1 /* Function summary pass.
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "tree.h"
60 #include "gimple.h"
61 #include "alloc-pool.h"
62 #include "tree-pass.h"
63 #include "ssa.h"
64 #include "tree-streamer.h"
65 #include "cgraph.h"
66 #include "diagnostic.h"
67 #include "fold-const.h"
68 #include "print-tree.h"
69 #include "tree-inline.h"
70 #include "gimple-pretty-print.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86 #include "tree-into-ssa.h"
87 #include "symtab-clones.h"
88 #include "gimple-range.h"
89 #include "tree-dfa.h"
90
91 /* Summaries. */
92 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
93 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
94 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
95
96 /* Edge predicates goes here. */
97 static object_allocator<ipa_predicate> edge_predicate_pool ("edge predicates");
98
99
100 /* Dump IPA hints. */
101 void
ipa_dump_hints(FILE * f,ipa_hints hints)102 ipa_dump_hints (FILE *f, ipa_hints hints)
103 {
104 if (!hints)
105 return;
106 fprintf (f, "IPA hints:");
107 if (hints & INLINE_HINT_indirect_call)
108 {
109 hints &= ~INLINE_HINT_indirect_call;
110 fprintf (f, " indirect_call");
111 }
112 if (hints & INLINE_HINT_loop_iterations)
113 {
114 hints &= ~INLINE_HINT_loop_iterations;
115 fprintf (f, " loop_iterations");
116 }
117 if (hints & INLINE_HINT_loop_stride)
118 {
119 hints &= ~INLINE_HINT_loop_stride;
120 fprintf (f, " loop_stride");
121 }
122 if (hints & INLINE_HINT_same_scc)
123 {
124 hints &= ~INLINE_HINT_same_scc;
125 fprintf (f, " same_scc");
126 }
127 if (hints & INLINE_HINT_in_scc)
128 {
129 hints &= ~INLINE_HINT_in_scc;
130 fprintf (f, " in_scc");
131 }
132 if (hints & INLINE_HINT_cross_module)
133 {
134 hints &= ~INLINE_HINT_cross_module;
135 fprintf (f, " cross_module");
136 }
137 if (hints & INLINE_HINT_declared_inline)
138 {
139 hints &= ~INLINE_HINT_declared_inline;
140 fprintf (f, " declared_inline");
141 }
142 if (hints & INLINE_HINT_known_hot)
143 {
144 hints &= ~INLINE_HINT_known_hot;
145 fprintf (f, " known_hot");
146 }
147 if (hints & INLINE_HINT_builtin_constant_p)
148 {
149 hints &= ~INLINE_HINT_builtin_constant_p;
150 fprintf (f, " builtin_constant_p");
151 }
152 gcc_assert (!hints);
153 }
154
155
156 /* Record SIZE and TIME to SUMMARY.
157 The accounted code will be executed when EXEC_PRED is true.
158 When NONCONST_PRED is false the code will evaluate to constant and
159 will get optimized out in specialized clones of the function.
160 If CALL is true account to call_size_time_table rather than
161 size_time_table. */
162
163 void
account_size_time(int size,sreal time,const ipa_predicate & exec_pred,const ipa_predicate & nonconst_pred_in,bool call)164 ipa_fn_summary::account_size_time (int size, sreal time,
165 const ipa_predicate &exec_pred,
166 const ipa_predicate &nonconst_pred_in,
167 bool call)
168 {
169 size_time_entry *e;
170 bool found = false;
171 int i;
172 ipa_predicate nonconst_pred;
173 vec<size_time_entry> *table = call ? &call_size_time_table : &size_time_table;
174
175 if (exec_pred == false)
176 return;
177
178 nonconst_pred = nonconst_pred_in & exec_pred;
179
180 if (nonconst_pred == false)
181 return;
182
183 /* We need to create initial empty unconditional clause, but otherwise
184 we don't need to account empty times and sizes. */
185 if (!size && time == 0 && table->length ())
186 return;
187
188 /* Only for calls we are unaccounting what we previously recorded. */
189 gcc_checking_assert (time >= 0 || call);
190
191 for (i = 0; table->iterate (i, &e); i++)
192 if (e->exec_predicate == exec_pred
193 && e->nonconst_predicate == nonconst_pred)
194 {
195 found = true;
196 break;
197 }
198 if (i == max_size_time_table_size)
199 {
200 i = 0;
201 found = true;
202 e = &(*table)[0];
203 if (dump_file && (dump_flags & TDF_DETAILS))
204 fprintf (dump_file,
205 "\t\tReached limit on number of entries, "
206 "ignoring the predicate.");
207 }
208 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
209 {
210 fprintf (dump_file,
211 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
212 ((double) size) / ipa_fn_summary::size_scale,
213 (time.to_double ()), found ? "" : "new ");
214 exec_pred.dump (dump_file, conds, 0);
215 if (exec_pred != nonconst_pred)
216 {
217 fprintf (dump_file, " nonconst:");
218 nonconst_pred.dump (dump_file, conds);
219 }
220 else
221 fprintf (dump_file, "\n");
222 }
223 if (!found)
224 {
225 class size_time_entry new_entry;
226 new_entry.size = size;
227 new_entry.time = time;
228 new_entry.exec_predicate = exec_pred;
229 new_entry.nonconst_predicate = nonconst_pred;
230 if (call)
231 call_size_time_table.safe_push (new_entry);
232 else
233 size_time_table.safe_push (new_entry);
234 }
235 else
236 {
237 e->size += size;
238 e->time += time;
239 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
240 /* Tolerate small roundoff issues. */
241 if (e->time < 0)
242 e->time = 0;
243 }
244 }
245
246 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
247
248 static struct cgraph_edge *
redirect_to_unreachable(struct cgraph_edge * e)249 redirect_to_unreachable (struct cgraph_edge *e)
250 {
251 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
252 struct cgraph_node *target = cgraph_node::get_create
253 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
254
255 if (e->speculative)
256 e = cgraph_edge::resolve_speculation (e, target->decl);
257 else if (!e->callee)
258 e = cgraph_edge::make_direct (e, target);
259 else
260 e->redirect_callee (target);
261 class ipa_call_summary *es = ipa_call_summaries->get (e);
262 e->inline_failed = CIF_UNREACHABLE;
263 e->count = profile_count::zero ();
264 es->call_stmt_size = 0;
265 es->call_stmt_time = 0;
266 if (callee)
267 callee->remove_symbol_and_inline_clones ();
268 return e;
269 }
270
271 /* Set predicate for edge E. */
272
273 static void
edge_set_predicate(struct cgraph_edge * e,ipa_predicate * predicate)274 edge_set_predicate (struct cgraph_edge *e, ipa_predicate *predicate)
275 {
276 /* If the edge is determined to be never executed, redirect it
277 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
278 be optimized out. */
279 if (predicate && *predicate == false
280 /* When handling speculative edges, we need to do the redirection
281 just once. Do it always on the direct edge, so we do not
282 attempt to resolve speculation while duplicating the edge. */
283 && (!e->speculative || e->callee))
284 e = redirect_to_unreachable (e);
285
286 class ipa_call_summary *es = ipa_call_summaries->get (e);
287 if (predicate && *predicate != true)
288 {
289 if (!es->predicate)
290 es->predicate = edge_predicate_pool.allocate ();
291 *es->predicate = *predicate;
292 }
293 else
294 {
295 if (es->predicate)
296 edge_predicate_pool.remove (es->predicate);
297 es->predicate = NULL;
298 }
299 }
300
301 /* Set predicate for hint *P. */
302
303 static void
set_hint_predicate(ipa_predicate ** p,ipa_predicate new_predicate)304 set_hint_predicate (ipa_predicate **p, ipa_predicate new_predicate)
305 {
306 if (new_predicate == false || new_predicate == true)
307 {
308 if (*p)
309 edge_predicate_pool.remove (*p);
310 *p = NULL;
311 }
312 else
313 {
314 if (!*p)
315 *p = edge_predicate_pool.allocate ();
316 **p = new_predicate;
317 }
318 }
319
320 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
321 Otherwise add a new item to the vector with this predicate and frerq equal
322 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
323 in which case the function does nothing. */
324
325 static void
add_freqcounting_predicate(vec<ipa_freqcounting_predicate,va_gc> ** v,const ipa_predicate & new_predicate,sreal add_freq,unsigned max_num_predicates)326 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
327 const ipa_predicate &new_predicate, sreal add_freq,
328 unsigned max_num_predicates)
329 {
330 if (new_predicate == false || new_predicate == true)
331 return;
332 ipa_freqcounting_predicate *f;
333 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
334 if (new_predicate == f->predicate)
335 {
336 f->freq += add_freq;
337 return;
338 }
339 if (vec_safe_length (*v) >= max_num_predicates)
340 /* Too many different predicates to account for. */
341 return;
342
343 ipa_freqcounting_predicate fcp;
344 fcp.predicate = NULL;
345 set_hint_predicate (&fcp.predicate, new_predicate);
346 fcp.freq = add_freq;
347 vec_safe_push (*v, fcp);
348 return;
349 }
350
351 /* Compute what conditions may or may not hold given information about
352 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
353 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
354 copy when called in a given context. It is a bitmask of conditions. Bit
355 0 means that condition is known to be false, while bit 1 means that condition
356 may or may not be true. These differs - for example NOT_INLINED condition
357 is always false in the second and also builtin_constant_p tests cannot use
358 the fact that parameter is indeed a constant.
359
360 When INLINE_P is true, assume that we are inlining. AVAL contains known
361 information about argument values. The function does not modify its content
362 and so AVALs could also be of type ipa_call_arg_values but so far all
363 callers work with the auto version and so we avoid the conversion for
364 convenience.
365
366 ERROR_MARK value of an argument means compile time invariant. */
367
368 static void
evaluate_conditions_for_known_args(struct cgraph_node * node,bool inline_p,ipa_auto_call_arg_values * avals,clause_t * ret_clause,clause_t * ret_nonspec_clause)369 evaluate_conditions_for_known_args (struct cgraph_node *node,
370 bool inline_p,
371 ipa_auto_call_arg_values *avals,
372 clause_t *ret_clause,
373 clause_t *ret_nonspec_clause)
374 {
375 clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
376 clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
377 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
378 int i;
379 struct condition *c;
380
381 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
382 {
383 tree val = NULL;
384 tree res;
385 int j;
386 struct expr_eval_op *op;
387
388 /* We allow call stmt to have fewer arguments than the callee function
389 (especially for K&R style programs). So bound check here (we assume
390 m_known_aggs vector is either empty or has the same length as
391 m_known_vals). */
392 gcc_checking_assert (!avals->m_known_aggs.length ()
393 || !avals->m_known_vals.length ()
394 || (avals->m_known_vals.length ()
395 == avals->m_known_aggs.length ()));
396
397 if (c->agg_contents)
398 {
399 if (c->code == ipa_predicate::changed
400 && !c->by_ref
401 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
402 continue;
403
404 if (ipa_agg_value_set *agg = avals->safe_aggval_at (c->operand_num))
405 {
406 tree sval = avals->safe_sval_at (c->operand_num);
407 val = ipa_find_agg_cst_for_param (agg, sval, c->offset,
408 c->by_ref);
409 }
410 else
411 val = NULL_TREE;
412 }
413 else
414 {
415 val = avals->safe_sval_at (c->operand_num);
416 if (val && val == error_mark_node
417 && c->code != ipa_predicate::changed)
418 val = NULL_TREE;
419 }
420
421 if (!val
422 && (c->code == ipa_predicate::changed
423 || c->code == ipa_predicate::is_not_constant))
424 {
425 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
426 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
427 continue;
428 }
429 if (c->code == ipa_predicate::changed)
430 {
431 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
432 continue;
433 }
434
435 if (c->code == ipa_predicate::is_not_constant)
436 {
437 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
438 continue;
439 }
440
441 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
442 {
443 if (c->type != TREE_TYPE (val))
444 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
445 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
446 {
447 if (!val)
448 break;
449 if (!op->val[0])
450 val = fold_unary (op->code, op->type, val);
451 else if (!op->val[1])
452 val = fold_binary (op->code, op->type,
453 op->index ? op->val[0] : val,
454 op->index ? val : op->val[0]);
455 else if (op->index == 0)
456 val = fold_ternary (op->code, op->type,
457 val, op->val[0], op->val[1]);
458 else if (op->index == 1)
459 val = fold_ternary (op->code, op->type,
460 op->val[0], val, op->val[1]);
461 else if (op->index == 2)
462 val = fold_ternary (op->code, op->type,
463 op->val[0], op->val[1], val);
464 else
465 val = NULL_TREE;
466 }
467
468 res = val
469 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
470 : NULL;
471
472 if (res && integer_zerop (res))
473 continue;
474 if (res && integer_onep (res))
475 {
476 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
477 nonspec_clause
478 |= 1 << (i + ipa_predicate::first_dynamic_condition);
479 continue;
480 }
481 }
482 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
483 && !c->agg_contents
484 && (!val || TREE_CODE (val) != INTEGER_CST))
485 {
486 value_range vr = avals->m_known_value_ranges[c->operand_num];
487 if (!vr.undefined_p ()
488 && !vr.varying_p ()
489 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
490 {
491 if (!useless_type_conversion_p (c->type, vr.type ()))
492 {
493 value_range res;
494 range_fold_unary_expr (&res, NOP_EXPR,
495 c->type, &vr, vr.type ());
496 vr = res;
497 }
498 tree type = c->type;
499
500 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
501 {
502 if (vr.varying_p () || vr.undefined_p ())
503 break;
504
505 value_range res;
506 if (!op->val[0])
507 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
508 else if (!op->val[1])
509 {
510 value_range op0 (op->val[0], op->val[0]);
511 range_fold_binary_expr (&res, op->code, op->type,
512 op->index ? &op0 : &vr,
513 op->index ? &vr : &op0);
514 }
515 else
516 gcc_unreachable ();
517 type = op->type;
518 vr = res;
519 }
520 if (!vr.varying_p () && !vr.undefined_p ())
521 {
522 value_range res;
523 value_range val_vr (c->val, c->val);
524 range_fold_binary_expr (&res, c->code, boolean_type_node,
525 &vr,
526 &val_vr);
527 if (res.zero_p ())
528 continue;
529 }
530 }
531 }
532
533 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
534 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
535 }
536 *ret_clause = clause;
537 if (ret_nonspec_clause)
538 *ret_nonspec_clause = nonspec_clause;
539 }
540
541 /* Return true if VRP will be exectued on the function.
542 We do not want to anticipate optimizations that will not happen.
543
544 FIXME: This can be confused with -fdisable and debug counters and thus
545 it should not be used for correctness (only to make heuristics work).
546 This means that inliner should do its own optimizations of expressions
547 that it predicts to be constant so wrong code can not be triggered by
548 builtin_constant_p. */
549
550 static bool
vrp_will_run_p(struct cgraph_node * node)551 vrp_will_run_p (struct cgraph_node *node)
552 {
553 return (opt_for_fn (node->decl, optimize)
554 && !opt_for_fn (node->decl, optimize_debug)
555 && opt_for_fn (node->decl, flag_tree_vrp));
556 }
557
558 /* Similarly about FRE. */
559
560 static bool
fre_will_run_p(struct cgraph_node * node)561 fre_will_run_p (struct cgraph_node *node)
562 {
563 return (opt_for_fn (node->decl, optimize)
564 && !opt_for_fn (node->decl, optimize_debug)
565 && opt_for_fn (node->decl, flag_tree_fre));
566 }
567
568 /* Work out what conditions might be true at invocation of E.
569 Compute costs for inlined edge if INLINE_P is true.
570
571 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
572 (if non-NULL) conditions evaluated for nonspecialized clone called
573 in a given context.
574
575 Vectors in AVALS will be populated with useful known information about
576 argument values - information not known to have any uses will be omitted -
577 except for m_known_contexts which will only be calculated if
578 COMPUTE_CONTEXTS is true. */
579
580 void
evaluate_properties_for_edge(struct cgraph_edge * e,bool inline_p,clause_t * clause_ptr,clause_t * nonspec_clause_ptr,ipa_auto_call_arg_values * avals,bool compute_contexts)581 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
582 clause_t *clause_ptr,
583 clause_t *nonspec_clause_ptr,
584 ipa_auto_call_arg_values *avals,
585 bool compute_contexts)
586 {
587 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
588 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
589 class ipa_edge_args *args;
590
591 if (clause_ptr)
592 *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
593
594 if (ipa_node_params_sum
595 && !e->call_stmt_cannot_inline_p
596 && (info->conds || compute_contexts)
597 && (args = ipa_edge_args_sum->get (e)) != NULL)
598 {
599 struct cgraph_node *caller;
600 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
601 class ipa_call_summary *es = ipa_call_summaries->get (e);
602 int i, count = ipa_get_cs_argument_count (args);
603
604 if (count)
605 {
606 if (e->caller->inlined_to)
607 caller = e->caller->inlined_to;
608 else
609 caller = e->caller;
610 caller_parms_info = ipa_node_params_sum->get (caller);
611 callee_pi = ipa_node_params_sum->get (callee);
612
613 /* Watch for thunks. */
614 if (callee_pi)
615 /* Watch for variadic functions. */
616 count = MIN (count, ipa_get_param_count (callee_pi));
617 }
618
619 if (callee_pi)
620 for (i = 0; i < count; i++)
621 {
622 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
623
624 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
625 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
626 {
627 /* Determine if we know constant value of the parameter. */
628 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
629 ipa_get_type (callee_pi, i));
630
631 if (!cst && e->call_stmt
632 && i < (int)gimple_call_num_args (e->call_stmt))
633 {
634 cst = gimple_call_arg (e->call_stmt, i);
635 if (!is_gimple_min_invariant (cst))
636 cst = NULL;
637 }
638 if (cst)
639 {
640 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
641 if (!avals->m_known_vals.length ())
642 avals->m_known_vals.safe_grow_cleared (count, true);
643 avals->m_known_vals[i] = cst;
644 }
645 else if (inline_p && !es->param[i].change_prob)
646 {
647 if (!avals->m_known_vals.length ())
648 avals->m_known_vals.safe_grow_cleared (count, true);
649 avals->m_known_vals[i] = error_mark_node;
650 }
651
652 /* If we failed to get simple constant, try value range. */
653 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
654 && vrp_will_run_p (caller)
655 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
656 {
657 value_range vr
658 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
659 ipa_get_type (callee_pi,
660 i));
661 if (!vr.undefined_p () && !vr.varying_p ())
662 {
663 if (!avals->m_known_value_ranges.length ())
664 {
665 avals->m_known_value_ranges.safe_grow (count, true);
666 for (int i = 0; i < count; ++i)
667 new (&avals->m_known_value_ranges[i])
668 value_range ();
669 }
670 avals->m_known_value_ranges[i] = vr;
671 }
672 }
673
674 /* Determine known aggregate values. */
675 if (fre_will_run_p (caller))
676 {
677 ipa_agg_value_set agg
678 = ipa_agg_value_set_from_jfunc (caller_parms_info,
679 caller, &jf->agg);
680 if (agg.items.length ())
681 {
682 if (!avals->m_known_aggs.length ())
683 avals->m_known_aggs.safe_grow_cleared (count, true);
684 avals->m_known_aggs[i] = agg;
685 }
686 }
687 }
688
689 /* For calls used in polymorphic calls we further determine
690 polymorphic call context. */
691 if (compute_contexts
692 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
693 {
694 ipa_polymorphic_call_context
695 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
696 if (!ctx.useless_p ())
697 {
698 if (!avals->m_known_contexts.length ())
699 avals->m_known_contexts.safe_grow_cleared (count, true);
700 avals->m_known_contexts[i]
701 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
702 }
703 }
704 }
705 else
706 gcc_assert (!count || callee->thunk);
707 }
708 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
709 {
710 int i, count = (int)gimple_call_num_args (e->call_stmt);
711
712 for (i = 0; i < count; i++)
713 {
714 tree cst = gimple_call_arg (e->call_stmt, i);
715 if (!is_gimple_min_invariant (cst))
716 cst = NULL;
717 if (cst)
718 {
719 if (!avals->m_known_vals.length ())
720 avals->m_known_vals.safe_grow_cleared (count, true);
721 avals->m_known_vals[i] = cst;
722 }
723 }
724 }
725
726 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
727 nonspec_clause_ptr);
728 }
729
730
731 /* Allocate the function summary. */
732
733 static void
ipa_fn_summary_alloc(void)734 ipa_fn_summary_alloc (void)
735 {
736 gcc_checking_assert (!ipa_fn_summaries);
737 ipa_size_summaries = new ipa_size_summary_t (symtab);
738 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
739 ipa_call_summaries = new ipa_call_summary_t (symtab);
740 }
741
~ipa_call_summary()742 ipa_call_summary::~ipa_call_summary ()
743 {
744 if (predicate)
745 edge_predicate_pool.remove (predicate);
746
747 param.release ();
748 }
749
~ipa_fn_summary()750 ipa_fn_summary::~ipa_fn_summary ()
751 {
752 unsigned len = vec_safe_length (loop_iterations);
753 for (unsigned i = 0; i < len; i++)
754 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
755 len = vec_safe_length (loop_strides);
756 for (unsigned i = 0; i < len; i++)
757 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
758 vec_free (conds);
759 call_size_time_table.release ();
760 vec_free (loop_iterations);
761 vec_free (loop_strides);
762 builtin_constant_p_parms.release ();
763 }
764
765 void
remove_callees(cgraph_node * node)766 ipa_fn_summary_t::remove_callees (cgraph_node *node)
767 {
768 cgraph_edge *e;
769 for (e = node->callees; e; e = e->next_callee)
770 ipa_call_summaries->remove (e);
771 for (e = node->indirect_calls; e; e = e->next_callee)
772 ipa_call_summaries->remove (e);
773 }
774
775 /* Duplicate predicates in loop hint vector, allocating memory for them and
776 remove and deallocate any uninteresting (true or false) ones. Return the
777 result. */
778
779 static vec<ipa_freqcounting_predicate, va_gc> *
remap_freqcounting_preds_after_dup(vec<ipa_freqcounting_predicate,va_gc> * v,clause_t possible_truths)780 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
781 clause_t possible_truths)
782 {
783 if (vec_safe_length (v) == 0)
784 return NULL;
785
786 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
787 int len = res->length();
788 for (int i = len - 1; i >= 0; i--)
789 {
790 ipa_predicate new_predicate
791 = (*res)[i].predicate->remap_after_duplication (possible_truths);
792 /* We do not want to free previous predicate; it is used by node
793 origin. */
794 (*res)[i].predicate = NULL;
795 set_hint_predicate (&(*res)[i].predicate, new_predicate);
796
797 if (!(*res)[i].predicate)
798 res->unordered_remove (i);
799 }
800
801 return res;
802 }
803
804
805 /* Hook that is called by cgraph.c when a node is duplicated. */
806 void
duplicate(cgraph_node * src,cgraph_node * dst,ipa_fn_summary * src_info,ipa_fn_summary * info)807 ipa_fn_summary_t::duplicate (cgraph_node *src,
808 cgraph_node *dst,
809 ipa_fn_summary *src_info,
810 ipa_fn_summary *info)
811 {
812 new (info) ipa_fn_summary (*src_info);
813 /* TODO: as an optimization, we may avoid copying conditions
814 that are known to be false or true. */
815 info->conds = vec_safe_copy (info->conds);
816
817 clone_info *cinfo = clone_info::get (dst);
818 /* When there are any replacements in the function body, see if we can figure
819 out that something was optimized out. */
820 if (ipa_node_params_sum && cinfo && cinfo->tree_map)
821 {
822 /* Use SRC parm info since it may not be copied yet. */
823 ipa_node_params *parms_info = ipa_node_params_sum->get (src);
824 ipa_auto_call_arg_values avals;
825 int count = ipa_get_param_count (parms_info);
826 int i, j;
827 clause_t possible_truths;
828 ipa_predicate true_pred = true;
829 size_time_entry *e;
830 int optimized_out_size = 0;
831 bool inlined_to_p = false;
832 struct cgraph_edge *edge, *next;
833
834 info->size_time_table.release ();
835 avals.m_known_vals.safe_grow_cleared (count, true);
836 for (i = 0; i < count; i++)
837 {
838 struct ipa_replace_map *r;
839
840 for (j = 0; vec_safe_iterate (cinfo->tree_map, j, &r); j++)
841 {
842 if (r->parm_num == i)
843 {
844 avals.m_known_vals[i] = r->new_tree;
845 break;
846 }
847 }
848 }
849 evaluate_conditions_for_known_args (dst, false,
850 &avals,
851 &possible_truths,
852 /* We are going to specialize,
853 so ignore nonspec truths. */
854 NULL);
855
856 info->account_size_time (0, 0, true_pred, true_pred);
857
858 /* Remap size_time vectors.
859 Simplify the predicate by pruning out alternatives that are known
860 to be false.
861 TODO: as on optimization, we can also eliminate conditions known
862 to be true. */
863 for (i = 0; src_info->size_time_table.iterate (i, &e); i++)
864 {
865 ipa_predicate new_exec_pred;
866 ipa_predicate new_nonconst_pred;
867 new_exec_pred = e->exec_predicate.remap_after_duplication
868 (possible_truths);
869 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
870 (possible_truths);
871 if (new_exec_pred == false || new_nonconst_pred == false)
872 optimized_out_size += e->size;
873 else
874 info->account_size_time (e->size, e->time, new_exec_pred,
875 new_nonconst_pred);
876 }
877
878 /* Remap edge predicates with the same simplification as above.
879 Also copy constantness arrays. */
880 for (edge = dst->callees; edge; edge = next)
881 {
882 ipa_predicate new_predicate;
883 class ipa_call_summary *es = ipa_call_summaries->get (edge);
884 next = edge->next_callee;
885
886 if (!edge->inline_failed)
887 inlined_to_p = true;
888 if (!es->predicate)
889 continue;
890 new_predicate = es->predicate->remap_after_duplication
891 (possible_truths);
892 if (new_predicate == false && *es->predicate != false)
893 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
894 edge_set_predicate (edge, &new_predicate);
895 }
896
897 /* Remap indirect edge predicates with the same simplification as above.
898 Also copy constantness arrays. */
899 for (edge = dst->indirect_calls; edge; edge = next)
900 {
901 ipa_predicate new_predicate;
902 class ipa_call_summary *es = ipa_call_summaries->get (edge);
903 next = edge->next_callee;
904
905 gcc_checking_assert (edge->inline_failed);
906 if (!es->predicate)
907 continue;
908 new_predicate = es->predicate->remap_after_duplication
909 (possible_truths);
910 if (new_predicate == false && *es->predicate != false)
911 optimized_out_size
912 += es->call_stmt_size * ipa_fn_summary::size_scale;
913 edge_set_predicate (edge, &new_predicate);
914 }
915 info->loop_iterations
916 = remap_freqcounting_preds_after_dup (info->loop_iterations,
917 possible_truths);
918 info->loop_strides
919 = remap_freqcounting_preds_after_dup (info->loop_strides,
920 possible_truths);
921 if (info->builtin_constant_p_parms.length())
922 {
923 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
924 int ip;
925 info->builtin_constant_p_parms = vNULL;
926 for (i = 0; parms.iterate (i, &ip); i++)
927 if (!avals.m_known_vals[ip])
928 info->builtin_constant_p_parms.safe_push (ip);
929 }
930
931 /* If inliner or someone after inliner will ever start producing
932 non-trivial clones, we will get trouble with lack of information
933 about updating self sizes, because size vectors already contains
934 sizes of the callees. */
935 gcc_assert (!inlined_to_p || !optimized_out_size);
936 }
937 else
938 {
939 info->size_time_table = src_info->size_time_table.copy ();
940 info->loop_iterations = vec_safe_copy (src_info->loop_iterations);
941 info->loop_strides = vec_safe_copy (info->loop_strides);
942
943 info->builtin_constant_p_parms
944 = info->builtin_constant_p_parms.copy ();
945
946 ipa_freqcounting_predicate *f;
947 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
948 {
949 ipa_predicate p = *f->predicate;
950 f->predicate = NULL;
951 set_hint_predicate (&f->predicate, p);
952 }
953 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
954 {
955 ipa_predicate p = *f->predicate;
956 f->predicate = NULL;
957 set_hint_predicate (&f->predicate, p);
958 }
959 }
960 if (!dst->inlined_to)
961 ipa_update_overall_fn_summary (dst);
962 }
963
964
965 /* Hook that is called by cgraph.c when a node is duplicated. */
966
967 void
duplicate(struct cgraph_edge * src,struct cgraph_edge * dst,class ipa_call_summary * srcinfo,class ipa_call_summary * info)968 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
969 struct cgraph_edge *dst,
970 class ipa_call_summary *srcinfo,
971 class ipa_call_summary *info)
972 {
973 new (info) ipa_call_summary (*srcinfo);
974 info->predicate = NULL;
975 edge_set_predicate (dst, srcinfo->predicate);
976 info->param = srcinfo->param.copy ();
977 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
978 {
979 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
980 - eni_size_weights.call_cost);
981 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
982 - eni_time_weights.call_cost);
983 }
984 }
985
986 /* Dump edge summaries associated to NODE and recursively to all clones.
987 Indent by INDENT. */
988
989 static void
dump_ipa_call_summary(FILE * f,int indent,struct cgraph_node * node,class ipa_fn_summary * info)990 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
991 class ipa_fn_summary *info)
992 {
993 struct cgraph_edge *edge;
994 for (edge = node->callees; edge; edge = edge->next_callee)
995 {
996 class ipa_call_summary *es = ipa_call_summaries->get (edge);
997 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
998 int i;
999
1000 fprintf (f,
1001 "%*s%s %s\n%*s freq:%4.2f",
1002 indent, "", callee->dump_name (),
1003 !edge->inline_failed
1004 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1005 indent, "", edge->sreal_frequency ().to_double ());
1006
1007 if (cross_module_call_p (edge))
1008 fprintf (f, " cross module");
1009
1010 if (es)
1011 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1012 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1013
1014 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1015 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1016 if (s != NULL)
1017 fprintf (f, " callee size:%2i stack:%2i",
1018 (int) (ss->size / ipa_fn_summary::size_scale),
1019 (int) s->estimated_stack_size);
1020
1021 if (es && es->predicate)
1022 {
1023 fprintf (f, " predicate: ");
1024 es->predicate->dump (f, info->conds);
1025 }
1026 else
1027 fprintf (f, "\n");
1028 if (es && es->param.exists ())
1029 for (i = 0; i < (int) es->param.length (); i++)
1030 {
1031 int prob = es->param[i].change_prob;
1032
1033 if (!prob)
1034 fprintf (f, "%*s op%i is compile time invariant\n",
1035 indent + 2, "", i);
1036 else if (prob != REG_BR_PROB_BASE)
1037 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1038 prob * 100.0 / REG_BR_PROB_BASE);
1039 if (es->param[i].points_to_local_or_readonly_memory)
1040 fprintf (f, "%*s op%i points to local or readonly memory\n",
1041 indent + 2, "", i);
1042 }
1043 if (!edge->inline_failed)
1044 {
1045 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1046 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1047 indent + 2, "",
1048 (int) ipa_get_stack_frame_offset (callee),
1049 (int) ss->estimated_self_stack_size);
1050 dump_ipa_call_summary (f, indent + 2, callee, info);
1051 }
1052 }
1053 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1054 {
1055 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1056 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1057 " time: %2i",
1058 indent, "",
1059 es->loop_depth,
1060 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1061 es->call_stmt_time);
1062 if (es->predicate)
1063 {
1064 fprintf (f, "predicate: ");
1065 es->predicate->dump (f, info->conds);
1066 }
1067 else
1068 fprintf (f, "\n");
1069 }
1070 }
1071
1072
1073 void
ipa_dump_fn_summary(FILE * f,struct cgraph_node * node)1074 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1075 {
1076 if (node->definition)
1077 {
1078 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1079 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1080 if (s != NULL)
1081 {
1082 size_time_entry *e;
1083 int i;
1084 fprintf (f, "IPA function summary for %s", node->dump_name ());
1085 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1086 fprintf (f, " always_inline");
1087 if (s->inlinable)
1088 fprintf (f, " inlinable");
1089 if (s->fp_expressions)
1090 fprintf (f, " fp_expression");
1091 if (s->builtin_constant_p_parms.length ())
1092 {
1093 fprintf (f, " builtin_constant_p_parms");
1094 for (unsigned int i = 0;
1095 i < s->builtin_constant_p_parms.length (); i++)
1096 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1097 }
1098 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1099 fprintf (f, " self size: %i\n", ss->self_size);
1100 fprintf (f, " global size: %i\n", ss->size);
1101 fprintf (f, " min size: %i\n", s->min_size);
1102 fprintf (f, " self stack: %i\n",
1103 (int) ss->estimated_self_stack_size);
1104 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1105 if (s->growth)
1106 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1107 if (s->scc_no)
1108 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1109 for (i = 0; s->size_time_table.iterate (i, &e); i++)
1110 {
1111 fprintf (f, " size:%f, time:%f",
1112 (double) e->size / ipa_fn_summary::size_scale,
1113 e->time.to_double ());
1114 if (e->exec_predicate != true)
1115 {
1116 fprintf (f, ", executed if:");
1117 e->exec_predicate.dump (f, s->conds, 0);
1118 }
1119 if (e->exec_predicate != e->nonconst_predicate)
1120 {
1121 fprintf (f, ", nonconst if:");
1122 e->nonconst_predicate.dump (f, s->conds, 0);
1123 }
1124 fprintf (f, "\n");
1125 }
1126 ipa_freqcounting_predicate *fcp;
1127 bool first_fcp = true;
1128 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1129 {
1130 if (first_fcp)
1131 {
1132 fprintf (f, " loop iterations:");
1133 first_fcp = false;
1134 }
1135 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1136 fcp->predicate->dump (f, s->conds);
1137 }
1138 first_fcp = true;
1139 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1140 {
1141 if (first_fcp)
1142 {
1143 fprintf (f, " loop strides:");
1144 first_fcp = false;
1145 }
1146 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1147 fcp->predicate->dump (f, s->conds);
1148 }
1149 fprintf (f, " calls:\n");
1150 dump_ipa_call_summary (f, 4, node, s);
1151 fprintf (f, "\n");
1152 }
1153 else
1154 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1155 }
1156 }
1157
1158 DEBUG_FUNCTION void
ipa_debug_fn_summary(struct cgraph_node * node)1159 ipa_debug_fn_summary (struct cgraph_node *node)
1160 {
1161 ipa_dump_fn_summary (stderr, node);
1162 }
1163
1164 void
ipa_dump_fn_summaries(FILE * f)1165 ipa_dump_fn_summaries (FILE *f)
1166 {
1167 struct cgraph_node *node;
1168
1169 FOR_EACH_DEFINED_FUNCTION (node)
1170 if (!node->inlined_to)
1171 ipa_dump_fn_summary (f, node);
1172 }
1173
1174 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1175 boolean variable pointed to by DATA. */
1176
1177 static bool
mark_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef ATTRIBUTE_UNUSED,void * data)1178 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1179 void *data)
1180 {
1181 bool *b = (bool *) data;
1182 *b = true;
1183 return true;
1184 }
1185
1186 /* If OP refers to value of function parameter, return the corresponding
1187 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1188 PARM_DECL) will be stored to *SIZE_P in that case too. */
1189
1190 static tree
unmodified_parm_1(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1191 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1192 poly_int64 *size_p)
1193 {
1194 /* SSA_NAME referring to parm default def? */
1195 if (TREE_CODE (op) == SSA_NAME
1196 && SSA_NAME_IS_DEFAULT_DEF (op)
1197 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1198 {
1199 if (size_p)
1200 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1201 return SSA_NAME_VAR (op);
1202 }
1203 /* Non-SSA parm reference? */
1204 if (TREE_CODE (op) == PARM_DECL
1205 && fbi->aa_walk_budget > 0)
1206 {
1207 bool modified = false;
1208
1209 ao_ref refd;
1210 ao_ref_init (&refd, op);
1211 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1212 mark_modified, &modified, NULL, NULL,
1213 fbi->aa_walk_budget);
1214 if (walked < 0)
1215 {
1216 fbi->aa_walk_budget = 0;
1217 return NULL_TREE;
1218 }
1219 fbi->aa_walk_budget -= walked;
1220 if (!modified)
1221 {
1222 if (size_p)
1223 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1224 return op;
1225 }
1226 }
1227 return NULL_TREE;
1228 }
1229
1230 /* If OP refers to value of function parameter, return the corresponding
1231 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1232 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1233 stored to *SIZE_P in that case too. */
1234
1235 static tree
unmodified_parm(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1236 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1237 poly_int64 *size_p)
1238 {
1239 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1240 if (res)
1241 return res;
1242
1243 if (TREE_CODE (op) == SSA_NAME
1244 && !SSA_NAME_IS_DEFAULT_DEF (op)
1245 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1246 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1247 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1248 size_p);
1249 return NULL_TREE;
1250 }
1251
1252 /* If OP refers to a value of a function parameter or value loaded from an
1253 aggregate passed to a parameter (either by value or reference), return TRUE
1254 and store the number of the parameter to *INDEX_P, the access size into
1255 *SIZE_P, and information whether and how it has been loaded from an
1256 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1257 statement in which OP is used or loaded. */
1258
1259 static bool
unmodified_parm_or_parm_agg_item(struct ipa_func_body_info * fbi,gimple * stmt,tree op,int * index_p,poly_int64 * size_p,struct agg_position_info * aggpos)1260 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1261 gimple *stmt, tree op, int *index_p,
1262 poly_int64 *size_p,
1263 struct agg_position_info *aggpos)
1264 {
1265 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1266
1267 gcc_checking_assert (aggpos);
1268 if (res)
1269 {
1270 *index_p = ipa_get_param_decl_index (fbi->info, res);
1271 if (*index_p < 0)
1272 return false;
1273 aggpos->agg_contents = false;
1274 aggpos->by_ref = false;
1275 return true;
1276 }
1277
1278 if (TREE_CODE (op) == SSA_NAME)
1279 {
1280 if (SSA_NAME_IS_DEFAULT_DEF (op)
1281 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1282 return false;
1283 stmt = SSA_NAME_DEF_STMT (op);
1284 op = gimple_assign_rhs1 (stmt);
1285 if (!REFERENCE_CLASS_P (op))
1286 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1287 aggpos);
1288 }
1289
1290 aggpos->agg_contents = true;
1291 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1292 stmt, op, index_p, &aggpos->offset,
1293 size_p, &aggpos->by_ref);
1294 }
1295
1296 /* See if statement might disappear after inlining.
1297 0 - means not eliminated
1298 1 - half of statements goes away
1299 2 - for sure it is eliminated.
1300 We are not terribly sophisticated, basically looking for simple abstraction
1301 penalty wrappers. */
1302
1303 static int
eliminated_by_inlining_prob(ipa_func_body_info * fbi,gimple * stmt)1304 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1305 {
1306 enum gimple_code code = gimple_code (stmt);
1307 enum tree_code rhs_code;
1308
1309 if (!optimize)
1310 return 0;
1311
1312 switch (code)
1313 {
1314 case GIMPLE_RETURN:
1315 return 2;
1316 case GIMPLE_ASSIGN:
1317 if (gimple_num_ops (stmt) != 2)
1318 return 0;
1319
1320 rhs_code = gimple_assign_rhs_code (stmt);
1321
1322 /* Casts of parameters, loads from parameters passed by reference
1323 and stores to return value or parameters are often free after
1324 inlining due to SRA and further combining.
1325 Assume that half of statements goes away. */
1326 if (CONVERT_EXPR_CODE_P (rhs_code)
1327 || rhs_code == VIEW_CONVERT_EXPR
1328 || rhs_code == ADDR_EXPR
1329 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1330 {
1331 tree rhs = gimple_assign_rhs1 (stmt);
1332 tree lhs = gimple_assign_lhs (stmt);
1333 tree inner_rhs = get_base_address (rhs);
1334 tree inner_lhs = get_base_address (lhs);
1335 bool rhs_free = false;
1336 bool lhs_free = false;
1337
1338 if (!inner_rhs)
1339 inner_rhs = rhs;
1340 if (!inner_lhs)
1341 inner_lhs = lhs;
1342
1343 /* Reads of parameter are expected to be free. */
1344 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1345 rhs_free = true;
1346 /* Match expressions of form &this->field. Those will most likely
1347 combine with something upstream after inlining. */
1348 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1349 {
1350 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1351 if (TREE_CODE (op) == PARM_DECL)
1352 rhs_free = true;
1353 else if (TREE_CODE (op) == MEM_REF
1354 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1355 NULL))
1356 rhs_free = true;
1357 }
1358
1359 /* When parameter is not SSA register because its address is taken
1360 and it is just copied into one, the statement will be completely
1361 free after inlining (we will copy propagate backward). */
1362 if (rhs_free && is_gimple_reg (lhs))
1363 return 2;
1364
1365 /* Reads of parameters passed by reference
1366 expected to be free (i.e. optimized out after inlining). */
1367 if (TREE_CODE (inner_rhs) == MEM_REF
1368 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1369 rhs_free = true;
1370
1371 /* Copying parameter passed by reference into gimple register is
1372 probably also going to copy propagate, but we can't be quite
1373 sure. */
1374 if (rhs_free && is_gimple_reg (lhs))
1375 lhs_free = true;
1376
1377 /* Writes to parameters, parameters passed by value and return value
1378 (either directly or passed via invisible reference) are free.
1379
1380 TODO: We ought to handle testcase like
1381 struct a {int a,b;};
1382 struct a
1383 returnstruct (void)
1384 {
1385 struct a a ={1,2};
1386 return a;
1387 }
1388
1389 This translate into:
1390
1391 returnstruct ()
1392 {
1393 int a$b;
1394 int a$a;
1395 struct a a;
1396 struct a D.2739;
1397
1398 <bb 2>:
1399 D.2739.a = 1;
1400 D.2739.b = 2;
1401 return D.2739;
1402
1403 }
1404 For that we either need to copy ipa-split logic detecting writes
1405 to return value. */
1406 if (TREE_CODE (inner_lhs) == PARM_DECL
1407 || TREE_CODE (inner_lhs) == RESULT_DECL
1408 || (TREE_CODE (inner_lhs) == MEM_REF
1409 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1410 NULL)
1411 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1412 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1413 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1414 (inner_lhs,
1415 0))) == RESULT_DECL))))
1416 lhs_free = true;
1417 if (lhs_free
1418 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1419 rhs_free = true;
1420 if (lhs_free && rhs_free)
1421 return 1;
1422 }
1423 return 0;
1424 default:
1425 return 0;
1426 }
1427 }
1428
1429 /* Analyze EXPR if it represents a series of simple operations performed on
1430 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1431 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1432 Type of the parameter or load from an aggregate via the parameter is
1433 stored in *TYPE_P. Operations on the parameter are recorded to
1434 PARAM_OPS_P if it is not NULL. */
1435
1436 static bool
1437 decompose_param_expr (struct ipa_func_body_info *fbi,
1438 gimple *stmt, tree expr,
1439 int *index_p, tree *type_p,
1440 struct agg_position_info *aggpos,
1441 expr_eval_ops *param_ops_p = NULL)
1442 {
1443 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1444 int op_count = 0;
1445
1446 if (param_ops_p)
1447 *param_ops_p = NULL;
1448
1449 while (true)
1450 {
1451 expr_eval_op eval_op;
1452 unsigned rhs_count;
1453 unsigned cst_count = 0;
1454
1455 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1456 aggpos))
1457 {
1458 tree type = TREE_TYPE (expr);
1459
1460 if (aggpos->agg_contents)
1461 {
1462 /* Stop if containing bit-field. */
1463 if (TREE_CODE (expr) == BIT_FIELD_REF
1464 || contains_bitfld_component_ref_p (expr))
1465 break;
1466 }
1467
1468 *type_p = type;
1469 return true;
1470 }
1471
1472 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1473 break;
1474
1475 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1476 break;
1477
1478 switch (gimple_assign_rhs_class (stmt))
1479 {
1480 case GIMPLE_SINGLE_RHS:
1481 expr = gimple_assign_rhs1 (stmt);
1482 continue;
1483
1484 case GIMPLE_UNARY_RHS:
1485 rhs_count = 1;
1486 break;
1487
1488 case GIMPLE_BINARY_RHS:
1489 rhs_count = 2;
1490 break;
1491
1492 case GIMPLE_TERNARY_RHS:
1493 rhs_count = 3;
1494 break;
1495
1496 default:
1497 goto fail;
1498 }
1499
1500 /* Stop if expression is too complex. */
1501 if (op_count++ == op_limit)
1502 break;
1503
1504 if (param_ops_p)
1505 {
1506 eval_op.code = gimple_assign_rhs_code (stmt);
1507 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1508 eval_op.val[0] = NULL_TREE;
1509 eval_op.val[1] = NULL_TREE;
1510 }
1511
1512 expr = NULL_TREE;
1513 for (unsigned i = 0; i < rhs_count; i++)
1514 {
1515 tree op = gimple_op (stmt, i + 1);
1516
1517 gcc_assert (op && !TYPE_P (op));
1518 if (is_gimple_ip_invariant (op))
1519 {
1520 if (++cst_count == rhs_count)
1521 goto fail;
1522
1523 eval_op.val[cst_count - 1] = op;
1524 }
1525 else if (!expr)
1526 {
1527 /* Found a non-constant operand, and record its index in rhs
1528 operands. */
1529 eval_op.index = i;
1530 expr = op;
1531 }
1532 else
1533 {
1534 /* Found more than one non-constant operands. */
1535 goto fail;
1536 }
1537 }
1538
1539 if (param_ops_p)
1540 vec_safe_insert (*param_ops_p, 0, eval_op);
1541 }
1542
1543 /* Failed to decompose, free resource and return. */
1544 fail:
1545 if (param_ops_p)
1546 vec_free (*param_ops_p);
1547
1548 return false;
1549 }
1550
1551 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1552
1553 static void
add_builtin_constant_p_parm(class ipa_fn_summary * summary,int parm)1554 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1555 {
1556 int ip;
1557
1558 /* Avoid duplicates. */
1559 for (unsigned int i = 0;
1560 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1561 if (ip == parm)
1562 return;
1563 summary->builtin_constant_p_parms.safe_push (parm);
1564 }
1565
1566 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1567 predicates to the CFG edges. */
1568
1569 static void
set_cond_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1570 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1571 class ipa_fn_summary *summary,
1572 class ipa_node_params *params_summary,
1573 basic_block bb)
1574 {
1575 gimple *last;
1576 tree op, op2;
1577 int index;
1578 struct agg_position_info aggpos;
1579 enum tree_code code, inverted_code;
1580 edge e;
1581 edge_iterator ei;
1582 gimple *set_stmt;
1583 tree param_type;
1584 expr_eval_ops param_ops;
1585
1586 last = last_stmt (bb);
1587 if (!last || gimple_code (last) != GIMPLE_COND)
1588 return;
1589 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1590 return;
1591 op = gimple_cond_lhs (last);
1592
1593 if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos,
1594 ¶m_ops))
1595 {
1596 code = gimple_cond_code (last);
1597 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1598
1599 FOR_EACH_EDGE (e, ei, bb->succs)
1600 {
1601 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1602 ? code : inverted_code);
1603 /* invert_tree_comparison will return ERROR_MARK on FP
1604 comparisons that are not EQ/NE instead of returning proper
1605 unordered one. Be sure it is not confused with NON_CONSTANT.
1606
1607 And if the edge's target is the final block of diamond CFG graph
1608 of this conditional statement, we do not need to compute
1609 predicate for the edge because the final block's predicate must
1610 be at least as that of the first block of the statement. */
1611 if (this_code != ERROR_MARK
1612 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1613 {
1614 ipa_predicate p
1615 = add_condition (summary, params_summary, index,
1616 param_type, &aggpos,
1617 this_code, gimple_cond_rhs (last), param_ops);
1618 e->aux = edge_predicate_pool.allocate ();
1619 *(ipa_predicate *) e->aux = p;
1620 }
1621 }
1622 vec_free (param_ops);
1623 }
1624
1625 if (TREE_CODE (op) != SSA_NAME)
1626 return;
1627 /* Special case
1628 if (builtin_constant_p (op))
1629 constant_code
1630 else
1631 nonconstant_code.
1632 Here we can predicate nonconstant_code. We can't
1633 really handle constant_code since we have no predicate
1634 for this and also the constant code is not known to be
1635 optimized away when inliner doesn't see operand is constant.
1636 Other optimizers might think otherwise. */
1637 if (gimple_cond_code (last) != NE_EXPR
1638 || !integer_zerop (gimple_cond_rhs (last)))
1639 return;
1640 set_stmt = SSA_NAME_DEF_STMT (op);
1641 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1642 || gimple_call_num_args (set_stmt) != 1)
1643 return;
1644 op2 = gimple_call_arg (set_stmt, 0);
1645 if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos))
1646 return;
1647 if (!aggpos.by_ref)
1648 add_builtin_constant_p_parm (summary, index);
1649 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1650 {
1651 ipa_predicate p = add_condition (summary, params_summary, index,
1652 param_type, &aggpos,
1653 ipa_predicate::is_not_constant, NULL_TREE);
1654 e->aux = edge_predicate_pool.allocate ();
1655 *(ipa_predicate *) e->aux = p;
1656 }
1657 }
1658
1659
1660 /* If BB ends by a switch we can turn into predicates, attach corresponding
1661 predicates to the CFG edges. */
1662
1663 static void
set_switch_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1664 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1665 class ipa_fn_summary *summary,
1666 class ipa_node_params *params_summary,
1667 basic_block bb)
1668 {
1669 gimple *lastg;
1670 tree op;
1671 int index;
1672 struct agg_position_info aggpos;
1673 edge e;
1674 edge_iterator ei;
1675 size_t n;
1676 size_t case_idx;
1677 tree param_type;
1678 expr_eval_ops param_ops;
1679
1680 lastg = last_stmt (bb);
1681 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1682 return;
1683 gswitch *last = as_a <gswitch *> (lastg);
1684 op = gimple_switch_index (last);
1685 if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos,
1686 ¶m_ops))
1687 return;
1688
1689 auto_vec<std::pair<tree, tree> > ranges;
1690 tree type = TREE_TYPE (op);
1691 int bound_limit = opt_for_fn (fbi->node->decl,
1692 param_ipa_max_switch_predicate_bounds);
1693 int bound_count = 0;
1694 value_range vr;
1695
1696 get_range_query (cfun)->range_of_expr (vr, op);
1697 if (vr.undefined_p ())
1698 vr.set_varying (TREE_TYPE (op));
1699 value_range_kind vr_type = vr.kind ();
1700 wide_int vr_wmin = wi::to_wide (vr.min ());
1701 wide_int vr_wmax = wi::to_wide (vr.max ());
1702
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 {
1705 e->aux = edge_predicate_pool.allocate ();
1706 *(ipa_predicate *) e->aux = false;
1707 }
1708
1709 e = gimple_switch_edge (cfun, last, 0);
1710 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1711 default case if its target basic block is in convergence point of all
1712 switch cases, which can be determined by checking whether it
1713 post-dominates the switch statement. */
1714 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1715 bound_count = INT_MAX;
1716
1717 n = gimple_switch_num_labels (last);
1718 for (case_idx = 1; case_idx < n; ++case_idx)
1719 {
1720 tree cl = gimple_switch_label (last, case_idx);
1721 tree min = CASE_LOW (cl);
1722 tree max = CASE_HIGH (cl);
1723 ipa_predicate p;
1724
1725 e = gimple_switch_edge (cfun, last, case_idx);
1726
1727 /* The case value might not have same type as switch expression,
1728 extend the value based on the expression type. */
1729 if (TREE_TYPE (min) != type)
1730 min = wide_int_to_tree (type, wi::to_wide (min));
1731
1732 if (!max)
1733 max = min;
1734 else if (TREE_TYPE (max) != type)
1735 max = wide_int_to_tree (type, wi::to_wide (max));
1736
1737 /* The case's target basic block is in convergence point of all switch
1738 cases, its predicate should be at least as that of the switch
1739 statement. */
1740 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1741 p = true;
1742 else if (min == max)
1743 p = add_condition (summary, params_summary, index, param_type,
1744 &aggpos, EQ_EXPR, min, param_ops);
1745 else
1746 {
1747 ipa_predicate p1, p2;
1748 p1 = add_condition (summary, params_summary, index, param_type,
1749 &aggpos, GE_EXPR, min, param_ops);
1750 p2 = add_condition (summary, params_summary,index, param_type,
1751 &aggpos, LE_EXPR, max, param_ops);
1752 p = p1 & p2;
1753 }
1754 *(ipa_predicate *) e->aux
1755 = p.or_with (summary->conds, *(ipa_predicate *) e->aux);
1756
1757 /* If there are too many disjoint case ranges, predicate for default
1758 case might become too complicated. So add a limit here. */
1759 if (bound_count > bound_limit)
1760 continue;
1761
1762 bool new_range = true;
1763
1764 if (!ranges.is_empty ())
1765 {
1766 wide_int curr_wmin = wi::to_wide (min);
1767 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1768
1769 /* Merge case ranges if they are continuous. */
1770 if (curr_wmin == last_wmax + 1)
1771 new_range = false;
1772 else if (vr_type == VR_ANTI_RANGE)
1773 {
1774 /* If two disjoint case ranges can be connected by anti-range
1775 of switch index, combine them to one range. */
1776 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1777 vr_type = VR_UNDEFINED;
1778 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1779 new_range = false;
1780 }
1781 }
1782
1783 /* Create/extend a case range. And we count endpoints of range set,
1784 this number nearly equals to number of conditions that we will create
1785 for predicate of default case. */
1786 if (new_range)
1787 {
1788 bound_count += (min == max) ? 1 : 2;
1789 ranges.safe_push (std::make_pair (min, max));
1790 }
1791 else
1792 {
1793 bound_count += (ranges.last ().first == ranges.last ().second);
1794 ranges.last ().second = max;
1795 }
1796 }
1797
1798 e = gimple_switch_edge (cfun, last, 0);
1799 if (bound_count > bound_limit)
1800 {
1801 *(ipa_predicate *) e->aux = true;
1802 vec_free (param_ops);
1803 return;
1804 }
1805
1806 ipa_predicate p_seg = true;
1807 ipa_predicate p_all = false;
1808
1809 if (vr_type != VR_RANGE)
1810 {
1811 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1812 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1813 }
1814
1815 /* Construct predicate to represent default range set that is negation of
1816 all case ranges. Case range is classified as containing single/non-single
1817 values. Suppose a piece of case ranges in the following.
1818
1819 [D1...D2] [S1] ... [Sn] [D3...D4]
1820
1821 To represent default case's range sets between two non-single value
1822 case ranges (From D2 to D3), we construct predicate as:
1823
1824 D2 < x < D3 && x != S1 && ... && x != Sn
1825 */
1826 for (size_t i = 0; i < ranges.length (); i++)
1827 {
1828 tree min = ranges[i].first;
1829 tree max = ranges[i].second;
1830
1831 if (min == max)
1832 p_seg &= add_condition (summary, params_summary, index,
1833 param_type, &aggpos, NE_EXPR,
1834 min, param_ops);
1835 else
1836 {
1837 /* Do not create sub-predicate for range that is beyond low bound
1838 of switch index. */
1839 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1840 {
1841 p_seg &= add_condition (summary, params_summary, index,
1842 param_type, &aggpos,
1843 LT_EXPR, min, param_ops);
1844 p_all = p_all.or_with (summary->conds, p_seg);
1845 }
1846
1847 /* Do not create sub-predicate for range that is beyond up bound
1848 of switch index. */
1849 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1850 {
1851 p_seg = false;
1852 break;
1853 }
1854
1855 p_seg = add_condition (summary, params_summary, index,
1856 param_type, &aggpos, GT_EXPR,
1857 max, param_ops);
1858 }
1859 }
1860
1861 p_all = p_all.or_with (summary->conds, p_seg);
1862 *(ipa_predicate *) e->aux
1863 = p_all.or_with (summary->conds, *(ipa_predicate *) e->aux);
1864
1865 vec_free (param_ops);
1866 }
1867
1868
1869 /* For each BB in NODE attach to its AUX pointer predicate under
1870 which it is executable. */
1871
1872 static void
compute_bb_predicates(struct ipa_func_body_info * fbi,struct cgraph_node * node,class ipa_fn_summary * summary,class ipa_node_params * params_summary)1873 compute_bb_predicates (struct ipa_func_body_info *fbi,
1874 struct cgraph_node *node,
1875 class ipa_fn_summary *summary,
1876 class ipa_node_params *params_summary)
1877 {
1878 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1879 bool done = false;
1880 basic_block bb;
1881
1882 FOR_EACH_BB_FN (bb, my_function)
1883 {
1884 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1885 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1886 }
1887
1888 /* Entry block is always executable. */
1889 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1890 = edge_predicate_pool.allocate ();
1891 *(ipa_predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1892
1893 /* A simple dataflow propagation of predicates forward in the CFG.
1894 TODO: work in reverse postorder. */
1895 while (!done)
1896 {
1897 done = true;
1898 FOR_EACH_BB_FN (bb, my_function)
1899 {
1900 ipa_predicate p = false;
1901 edge e;
1902 edge_iterator ei;
1903 FOR_EACH_EDGE (e, ei, bb->preds)
1904 {
1905 if (e->src->aux)
1906 {
1907 ipa_predicate this_bb_predicate
1908 = *(ipa_predicate *) e->src->aux;
1909 if (e->aux)
1910 this_bb_predicate &= (*(ipa_predicate *) e->aux);
1911 p = p.or_with (summary->conds, this_bb_predicate);
1912 if (p == true)
1913 break;
1914 }
1915 }
1916 if (p != false)
1917 {
1918 basic_block pdom_bb;
1919
1920 if (!bb->aux)
1921 {
1922 done = false;
1923 bb->aux = edge_predicate_pool.allocate ();
1924 *((ipa_predicate *) bb->aux) = p;
1925 }
1926 else if (p != *(ipa_predicate *) bb->aux)
1927 {
1928 /* This OR operation is needed to ensure monotonous data flow
1929 in the case we hit the limit on number of clauses and the
1930 and/or operations above give approximate answers. */
1931 p = p.or_with (summary->conds, *(ipa_predicate *)bb->aux);
1932 if (p != *(ipa_predicate *)bb->aux)
1933 {
1934 done = false;
1935 *((ipa_predicate *)bb->aux) = p;
1936 }
1937 }
1938
1939 /* For switch/if statement, we can OR-combine predicates of all
1940 its cases/branches to get predicate for basic block in their
1941 convergence point, but sometimes this will generate very
1942 complicated predicate. Actually, we can get simplified
1943 predicate in another way by using the fact that predicate
1944 for a basic block must also hold true for its post dominators.
1945 To be specific, basic block in convergence point of
1946 conditional statement should include predicate of the
1947 statement. */
1948 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1949 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1950 ;
1951 else if (!pdom_bb->aux)
1952 {
1953 done = false;
1954 pdom_bb->aux = edge_predicate_pool.allocate ();
1955 *((ipa_predicate *)pdom_bb->aux) = p;
1956 }
1957 else if (p != *(ipa_predicate *)pdom_bb->aux)
1958 {
1959 p = p.or_with (summary->conds,
1960 *(ipa_predicate *)pdom_bb->aux);
1961 if (p != *(ipa_predicate *)pdom_bb->aux)
1962 {
1963 done = false;
1964 *((ipa_predicate *)pdom_bb->aux) = p;
1965 }
1966 }
1967 }
1968 }
1969 }
1970 }
1971
1972
1973 /* Return predicate specifying when the STMT might have result that is not
1974 a compile time constant. */
1975
1976 static ipa_predicate
will_be_nonconstant_expr_predicate(ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,tree expr,vec<ipa_predicate> nonconstant_names)1977 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1978 class ipa_fn_summary *summary,
1979 class ipa_node_params *params_summary,
1980 tree expr,
1981 vec<ipa_predicate> nonconstant_names)
1982 {
1983 tree parm;
1984 int index;
1985
1986 while (UNARY_CLASS_P (expr))
1987 expr = TREE_OPERAND (expr, 0);
1988
1989 parm = unmodified_parm (fbi, NULL, expr, NULL);
1990 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1991 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1992 ipa_predicate::changed, NULL_TREE);
1993 if (is_gimple_min_invariant (expr))
1994 return false;
1995 if (TREE_CODE (expr) == SSA_NAME)
1996 return nonconstant_names[SSA_NAME_VERSION (expr)];
1997 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1998 {
1999 ipa_predicate p1
2000 = will_be_nonconstant_expr_predicate (fbi, summary,
2001 params_summary,
2002 TREE_OPERAND (expr, 0),
2003 nonconstant_names);
2004 if (p1 == true)
2005 return p1;
2006
2007 ipa_predicate p2
2008 = will_be_nonconstant_expr_predicate (fbi, summary,
2009 params_summary,
2010 TREE_OPERAND (expr, 1),
2011 nonconstant_names);
2012 return p1.or_with (summary->conds, p2);
2013 }
2014 else if (TREE_CODE (expr) == COND_EXPR)
2015 {
2016 ipa_predicate p1
2017 = will_be_nonconstant_expr_predicate (fbi, summary,
2018 params_summary,
2019 TREE_OPERAND (expr, 0),
2020 nonconstant_names);
2021 if (p1 == true)
2022 return p1;
2023
2024 ipa_predicate p2
2025 = will_be_nonconstant_expr_predicate (fbi, summary,
2026 params_summary,
2027 TREE_OPERAND (expr, 1),
2028 nonconstant_names);
2029 if (p2 == true)
2030 return p2;
2031 p1 = p1.or_with (summary->conds, p2);
2032 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2033 params_summary,
2034 TREE_OPERAND (expr, 2),
2035 nonconstant_names);
2036 return p2.or_with (summary->conds, p1);
2037 }
2038 else if (TREE_CODE (expr) == CALL_EXPR)
2039 return true;
2040 else
2041 {
2042 debug_tree (expr);
2043 gcc_unreachable ();
2044 }
2045 }
2046
2047
2048 /* Return predicate specifying when the STMT might have result that is not
2049 a compile time constant. */
2050
2051 static ipa_predicate
will_be_nonconstant_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,gimple * stmt,vec<ipa_predicate> nonconstant_names)2052 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2053 class ipa_fn_summary *summary,
2054 class ipa_node_params *params_summary,
2055 gimple *stmt,
2056 vec<ipa_predicate> nonconstant_names)
2057 {
2058 ipa_predicate p = true;
2059 ssa_op_iter iter;
2060 tree use;
2061 tree param_type = NULL_TREE;
2062 ipa_predicate op_non_const;
2063 bool is_load;
2064 int base_index;
2065 struct agg_position_info aggpos;
2066
2067 /* What statements might be optimized away
2068 when their arguments are constant. */
2069 if (gimple_code (stmt) != GIMPLE_ASSIGN
2070 && gimple_code (stmt) != GIMPLE_COND
2071 && gimple_code (stmt) != GIMPLE_SWITCH
2072 && (gimple_code (stmt) != GIMPLE_CALL
2073 || !(gimple_call_flags (stmt) & ECF_CONST)))
2074 return p;
2075
2076 /* Stores will stay anyway. */
2077 if (gimple_store_p (stmt))
2078 return p;
2079
2080 is_load = gimple_assign_load_p (stmt);
2081
2082 /* Loads can be optimized when the value is known. */
2083 if (is_load)
2084 {
2085 tree op = gimple_assign_rhs1 (stmt);
2086 if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type,
2087 &aggpos))
2088 return p;
2089 }
2090 else
2091 base_index = -1;
2092
2093 /* See if we understand all operands before we start
2094 adding conditionals. */
2095 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2096 {
2097 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2098 /* For arguments we can build a condition. */
2099 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2100 continue;
2101 if (TREE_CODE (use) != SSA_NAME)
2102 return p;
2103 /* If we know when operand is constant,
2104 we still can say something useful. */
2105 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2106 continue;
2107 return p;
2108 }
2109
2110 if (is_load)
2111 op_non_const =
2112 add_condition (summary, params_summary,
2113 base_index, param_type, &aggpos,
2114 ipa_predicate::changed, NULL_TREE);
2115 else
2116 op_non_const = false;
2117 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2118 {
2119 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2120 int index;
2121
2122 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2123 {
2124 if (index != base_index)
2125 p = add_condition (summary, params_summary, index,
2126 TREE_TYPE (parm), NULL,
2127 ipa_predicate::changed, NULL_TREE);
2128 else
2129 continue;
2130 }
2131 else
2132 p = nonconstant_names[SSA_NAME_VERSION (use)];
2133 op_non_const = p.or_with (summary->conds, op_non_const);
2134 }
2135 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2136 && gimple_op (stmt, 0)
2137 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2138 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2139 = op_non_const;
2140 return op_non_const;
2141 }
2142
2143 struct record_modified_bb_info
2144 {
2145 tree op;
2146 bitmap bb_set;
2147 gimple *stmt;
2148 };
2149
2150 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2151 probability how often it changes between USE_BB.
2152 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2153 is in different loop nest, we can do better.
2154 This is all just estimate. In theory we look for minimal cut separating
2155 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2156 anyway. */
2157
2158 static basic_block
get_minimal_bb(basic_block init_bb,basic_block use_bb)2159 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2160 {
2161 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2162 if (l && l->header->count < init_bb->count)
2163 return l->header;
2164 return init_bb;
2165 }
2166
2167 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2168 set except for info->stmt. */
2169
2170 static bool
record_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef,void * data)2171 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2172 {
2173 struct record_modified_bb_info *info =
2174 (struct record_modified_bb_info *) data;
2175 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2176 return false;
2177 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2178 return false;
2179 bitmap_set_bit (info->bb_set,
2180 SSA_NAME_IS_DEFAULT_DEF (vdef)
2181 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2182 : get_minimal_bb
2183 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2184 gimple_bb (info->stmt))->index);
2185 if (dump_file)
2186 {
2187 fprintf (dump_file, " Param ");
2188 print_generic_expr (dump_file, info->op, TDF_SLIM);
2189 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2190 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2191 get_minimal_bb
2192 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2193 gimple_bb (info->stmt))->index);
2194 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2195 }
2196 return false;
2197 }
2198
2199 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2200 will change since last invocation of STMT.
2201
2202 Value 0 is reserved for compile time invariants.
2203 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2204 ought to be REG_BR_PROB_BASE / estimated_iters. */
2205
2206 static int
param_change_prob(ipa_func_body_info * fbi,gimple * stmt,int i)2207 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2208 {
2209 tree op = gimple_call_arg (stmt, i);
2210 basic_block bb = gimple_bb (stmt);
2211
2212 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2213 op = TREE_OPERAND (op, 0);
2214
2215 tree base = get_base_address (op);
2216
2217 /* Global invariants never change. */
2218 if (is_gimple_min_invariant (base))
2219 return 0;
2220
2221 /* We would have to do non-trivial analysis to really work out what
2222 is the probability of value to change (i.e. when init statement
2223 is in a sibling loop of the call).
2224
2225 We do an conservative estimate: when call is executed N times more often
2226 than the statement defining value, we take the frequency 1/N. */
2227 if (TREE_CODE (base) == SSA_NAME)
2228 {
2229 profile_count init_count;
2230
2231 if (!bb->count.nonzero_p ())
2232 return REG_BR_PROB_BASE;
2233
2234 if (SSA_NAME_IS_DEFAULT_DEF (base))
2235 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2236 else
2237 init_count = get_minimal_bb
2238 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2239 gimple_bb (stmt))->count;
2240
2241 if (init_count < bb->count)
2242 return MAX ((init_count.to_sreal_scale (bb->count)
2243 * REG_BR_PROB_BASE).to_int (), 1);
2244 return REG_BR_PROB_BASE;
2245 }
2246 else
2247 {
2248 ao_ref refd;
2249 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2250 struct record_modified_bb_info info;
2251 tree init = ctor_for_folding (base);
2252
2253 if (init != error_mark_node)
2254 return 0;
2255 if (!bb->count.nonzero_p () || fbi->aa_walk_budget == 0)
2256 return REG_BR_PROB_BASE;
2257 if (dump_file)
2258 {
2259 fprintf (dump_file, " Analyzing param change probability of ");
2260 print_generic_expr (dump_file, op, TDF_SLIM);
2261 fprintf (dump_file, "\n");
2262 }
2263 ao_ref_init (&refd, op);
2264 info.op = op;
2265 info.stmt = stmt;
2266 info.bb_set = BITMAP_ALLOC (NULL);
2267 int walked
2268 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2269 NULL, NULL, fbi->aa_walk_budget);
2270 if (walked > 0)
2271 fbi->aa_walk_budget -= walked;
2272 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2273 {
2274 if (walked < 0)
2275 fbi->aa_walk_budget = 0;
2276 if (dump_file)
2277 {
2278 if (walked < 0)
2279 fprintf (dump_file, " Ran out of AA walking budget.\n");
2280 else
2281 fprintf (dump_file, " Set in same BB as used.\n");
2282 }
2283 BITMAP_FREE (info.bb_set);
2284 return REG_BR_PROB_BASE;
2285 }
2286
2287 bitmap_iterator bi;
2288 unsigned index;
2289 /* Lookup the most frequent update of the value and believe that
2290 it dominates all the other; precise analysis here is difficult. */
2291 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2292 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2293 if (dump_file)
2294 {
2295 fprintf (dump_file, " Set with count ");
2296 max.dump (dump_file);
2297 fprintf (dump_file, " and used with count ");
2298 bb->count.dump (dump_file);
2299 fprintf (dump_file, " freq %f\n",
2300 max.to_sreal_scale (bb->count).to_double ());
2301 }
2302
2303 BITMAP_FREE (info.bb_set);
2304 if (max < bb->count)
2305 return MAX ((max.to_sreal_scale (bb->count)
2306 * REG_BR_PROB_BASE).to_int (), 1);
2307 return REG_BR_PROB_BASE;
2308 }
2309 }
2310
2311 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2312 sub-graph and if the predicate the condition depends on is known. If so,
2313 return true and store the pointer the predicate in *P. */
2314
2315 static bool
phi_result_unknown_predicate(ipa_func_body_info * fbi,ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb,ipa_predicate * p,vec<ipa_predicate> nonconstant_names)2316 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2317 ipa_fn_summary *summary,
2318 class ipa_node_params *params_summary,
2319 basic_block bb,
2320 ipa_predicate *p,
2321 vec<ipa_predicate> nonconstant_names)
2322 {
2323 edge e;
2324 edge_iterator ei;
2325 basic_block first_bb = NULL;
2326 gimple *stmt;
2327
2328 if (single_pred_p (bb))
2329 {
2330 *p = false;
2331 return true;
2332 }
2333
2334 FOR_EACH_EDGE (e, ei, bb->preds)
2335 {
2336 if (single_succ_p (e->src))
2337 {
2338 if (!single_pred_p (e->src))
2339 return false;
2340 if (!first_bb)
2341 first_bb = single_pred (e->src);
2342 else if (single_pred (e->src) != first_bb)
2343 return false;
2344 }
2345 else
2346 {
2347 if (!first_bb)
2348 first_bb = e->src;
2349 else if (e->src != first_bb)
2350 return false;
2351 }
2352 }
2353
2354 if (!first_bb)
2355 return false;
2356
2357 stmt = last_stmt (first_bb);
2358 if (!stmt
2359 || gimple_code (stmt) != GIMPLE_COND
2360 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2361 return false;
2362
2363 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2364 gimple_cond_lhs (stmt),
2365 nonconstant_names);
2366 if (*p == true)
2367 return false;
2368 else
2369 return true;
2370 }
2371
2372 /* Given a PHI statement in a function described by inline properties SUMMARY
2373 and *P being the predicate describing whether the selected PHI argument is
2374 known, store a predicate for the result of the PHI statement into
2375 NONCONSTANT_NAMES, if possible. */
2376
2377 static void
predicate_for_phi_result(class ipa_fn_summary * summary,gphi * phi,ipa_predicate * p,vec<ipa_predicate> nonconstant_names)2378 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2379 ipa_predicate *p,
2380 vec<ipa_predicate> nonconstant_names)
2381 {
2382 unsigned i;
2383
2384 for (i = 0; i < gimple_phi_num_args (phi); i++)
2385 {
2386 tree arg = gimple_phi_arg (phi, i)->def;
2387 if (!is_gimple_min_invariant (arg))
2388 {
2389 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2390 *p = p->or_with (summary->conds,
2391 nonconstant_names[SSA_NAME_VERSION (arg)]);
2392 if (*p == true)
2393 return;
2394 }
2395 }
2396
2397 if (dump_file && (dump_flags & TDF_DETAILS))
2398 {
2399 fprintf (dump_file, "\t\tphi predicate: ");
2400 p->dump (dump_file, summary->conds);
2401 }
2402 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2403 }
2404
2405 /* For a typical usage of __builtin_expect (a<b, 1), we
2406 may introduce an extra relation stmt:
2407 With the builtin, we have
2408 t1 = a <= b;
2409 t2 = (long int) t1;
2410 t3 = __builtin_expect (t2, 1);
2411 if (t3 != 0)
2412 goto ...
2413 Without the builtin, we have
2414 if (a<=b)
2415 goto...
2416 This affects the size/time estimation and may have
2417 an impact on the earlier inlining.
2418 Here find this pattern and fix it up later. */
2419
2420 static gimple *
find_foldable_builtin_expect(basic_block bb)2421 find_foldable_builtin_expect (basic_block bb)
2422 {
2423 gimple_stmt_iterator bsi;
2424
2425 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2426 {
2427 gimple *stmt = gsi_stmt (bsi);
2428 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2429 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2430 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2431 {
2432 tree var = gimple_call_lhs (stmt);
2433 tree arg = gimple_call_arg (stmt, 0);
2434 use_operand_p use_p;
2435 gimple *use_stmt;
2436 bool match = false;
2437 bool done = false;
2438
2439 if (!var || !arg)
2440 continue;
2441 gcc_assert (TREE_CODE (var) == SSA_NAME);
2442
2443 while (TREE_CODE (arg) == SSA_NAME)
2444 {
2445 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2446 if (!is_gimple_assign (stmt_tmp))
2447 break;
2448 switch (gimple_assign_rhs_code (stmt_tmp))
2449 {
2450 case LT_EXPR:
2451 case LE_EXPR:
2452 case GT_EXPR:
2453 case GE_EXPR:
2454 case EQ_EXPR:
2455 case NE_EXPR:
2456 match = true;
2457 done = true;
2458 break;
2459 CASE_CONVERT:
2460 break;
2461 default:
2462 done = true;
2463 break;
2464 }
2465 if (done)
2466 break;
2467 arg = gimple_assign_rhs1 (stmt_tmp);
2468 }
2469
2470 if (match && single_imm_use (var, &use_p, &use_stmt)
2471 && gimple_code (use_stmt) == GIMPLE_COND)
2472 return use_stmt;
2473 }
2474 }
2475 return NULL;
2476 }
2477
2478 /* Return true when the basic blocks contains only clobbers followed by RESX.
2479 Such BBs are kept around to make removal of dead stores possible with
2480 presence of EH and will be optimized out by optimize_clobbers later in the
2481 game.
2482
2483 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2484 that can be clobber only, too.. When it is false, the RESX is not necessary
2485 on the end of basic block. */
2486
2487 static bool
2488 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2489 {
2490 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2491 edge_iterator ei;
2492 edge e;
2493
2494 if (need_eh)
2495 {
2496 if (gsi_end_p (gsi))
2497 return false;
2498 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2499 return false;
2500 gsi_prev (&gsi);
2501 }
2502 else if (!single_succ_p (bb))
2503 return false;
2504
2505 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2506 {
2507 gimple *stmt = gsi_stmt (gsi);
2508 if (is_gimple_debug (stmt))
2509 continue;
2510 if (gimple_clobber_p (stmt))
2511 continue;
2512 if (gimple_code (stmt) == GIMPLE_LABEL)
2513 break;
2514 return false;
2515 }
2516
2517 /* See if all predecessors are either throws or clobber only BBs. */
2518 FOR_EACH_EDGE (e, ei, bb->preds)
2519 if (!(e->flags & EDGE_EH)
2520 && !clobber_only_eh_bb_p (e->src, false))
2521 return false;
2522
2523 return true;
2524 }
2525
2526 /* Return true if STMT compute a floating point expression that may be affected
2527 by -ffast-math and similar flags. */
2528
2529 static bool
fp_expression_p(gimple * stmt)2530 fp_expression_p (gimple *stmt)
2531 {
2532 ssa_op_iter i;
2533 tree op;
2534
2535 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2536 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2537 return true;
2538 return false;
2539 }
2540
2541 /* Return true if T references memory location that is local
2542 for the function (that means, dead after return) or read-only. */
2543
2544 bool
refs_local_or_readonly_memory_p(tree t)2545 refs_local_or_readonly_memory_p (tree t)
2546 {
2547 /* Non-escaping memory is fine. */
2548 t = get_base_address (t);
2549 if ((TREE_CODE (t) == MEM_REF
2550 || TREE_CODE (t) == TARGET_MEM_REF))
2551 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2552
2553 /* Automatic variables are fine. */
2554 if (DECL_P (t)
2555 && auto_var_in_fn_p (t, current_function_decl))
2556 return true;
2557
2558 /* Read-only variables are fine. */
2559 if (DECL_P (t) && TREE_READONLY (t))
2560 return true;
2561
2562 return false;
2563 }
2564
2565 /* Return true if T is a pointer pointing to memory location that is local
2566 for the function (that means, dead after return) or read-only. */
2567
2568 bool
points_to_local_or_readonly_memory_p(tree t)2569 points_to_local_or_readonly_memory_p (tree t)
2570 {
2571 /* See if memory location is clearly invalid. */
2572 if (integer_zerop (t))
2573 return flag_delete_null_pointer_checks;
2574 if (TREE_CODE (t) == SSA_NAME)
2575 {
2576 /* For IPA passes we can consinder accesses to return slot local
2577 even if it is not local in the sense that memory is dead by
2578 the end of founction.
2579 The outer function will see a store in the call assignment
2580 and thus this will do right thing for all uses of this
2581 function in the current IPA passes (modref, pure/const discovery
2582 and inlining heuristics). */
2583 if (DECL_RESULT (current_function_decl)
2584 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
2585 && t == ssa_default_def (cfun, DECL_RESULT (current_function_decl)))
2586 return true;
2587 return !ptr_deref_may_alias_global_p (t);
2588 }
2589 if (TREE_CODE (t) == ADDR_EXPR)
2590 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2591 return false;
2592 }
2593
2594
2595 /* Analyze function body for NODE.
2596 EARLY indicates run from early optimization pipeline. */
2597
2598 static void
analyze_function_body(struct cgraph_node * node,bool early)2599 analyze_function_body (struct cgraph_node *node, bool early)
2600 {
2601 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2602 /* Estimate static overhead for function prologue/epilogue and alignment. */
2603 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2604 /* Benefits are scaled by probability of elimination that is in range
2605 <0,2>. */
2606 basic_block bb;
2607 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2608 sreal freq;
2609 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2610 ipa_node_params *params_summary
2611 = early ? NULL : ipa_node_params_sum->get (node);
2612 ipa_predicate bb_predicate;
2613 struct ipa_func_body_info fbi;
2614 vec<ipa_predicate> nonconstant_names = vNULL;
2615 int nblocks, n;
2616 int *order;
2617 gimple *fix_builtin_expect_stmt;
2618
2619 gcc_assert (my_function && my_function->cfg);
2620 gcc_assert (cfun == my_function);
2621
2622 memset(&fbi, 0, sizeof(fbi));
2623 vec_free (info->conds);
2624 info->conds = NULL;
2625 info->size_time_table.release ();
2626 info->call_size_time_table.release ();
2627
2628 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2629 so we can produce proper inline hints.
2630
2631 When optimizing and analyzing for early inliner, initialize node params
2632 so we can produce correct BB predicates. */
2633
2634 if (opt_for_fn (node->decl, optimize))
2635 {
2636 calculate_dominance_info (CDI_DOMINATORS);
2637 calculate_dominance_info (CDI_POST_DOMINATORS);
2638 if (!early)
2639 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2640 else
2641 {
2642 ipa_check_create_node_params ();
2643 ipa_initialize_node_params (node);
2644 }
2645
2646 if (ipa_node_params_sum)
2647 {
2648 fbi.node = node;
2649 fbi.info = ipa_node_params_sum->get (node);
2650 fbi.bb_infos = vNULL;
2651 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2652 fbi.param_count = count_formal_params (node->decl);
2653 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2654
2655 nonconstant_names.safe_grow_cleared
2656 (SSANAMES (my_function)->length (), true);
2657 }
2658 }
2659
2660 if (dump_file)
2661 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2662 node->dump_name ());
2663
2664 /* When we run into maximal number of entries, we assign everything to the
2665 constant truth case. Be sure to have it in list. */
2666 bb_predicate = true;
2667 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2668
2669 bb_predicate = ipa_predicate::not_inlined ();
2670 info->account_size_time (opt_for_fn (node->decl,
2671 param_uninlined_function_insns)
2672 * ipa_fn_summary::size_scale,
2673 opt_for_fn (node->decl,
2674 param_uninlined_function_time),
2675 bb_predicate,
2676 bb_predicate);
2677
2678 if (fbi.info)
2679 compute_bb_predicates (&fbi, node, info, params_summary);
2680 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2681 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2682 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2683 for (n = 0; n < nblocks; n++)
2684 {
2685 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2686 freq = bb->count.to_sreal_scale (entry_count);
2687 if (clobber_only_eh_bb_p (bb))
2688 {
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file, "\n Ignoring BB %i;"
2691 " it will be optimized away by cleanup_clobbers\n",
2692 bb->index);
2693 continue;
2694 }
2695
2696 /* TODO: Obviously predicates can be propagated down across CFG. */
2697 if (fbi.info)
2698 {
2699 if (bb->aux)
2700 bb_predicate = *(ipa_predicate *)bb->aux;
2701 else
2702 bb_predicate = false;
2703 }
2704 else
2705 bb_predicate = true;
2706
2707 if (dump_file && (dump_flags & TDF_DETAILS))
2708 {
2709 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2710 bb_predicate.dump (dump_file, info->conds);
2711 }
2712
2713 if (fbi.info && nonconstant_names.exists ())
2714 {
2715 ipa_predicate phi_predicate;
2716 bool first_phi = true;
2717
2718 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2719 gsi_next (&bsi))
2720 {
2721 if (first_phi
2722 && !phi_result_unknown_predicate (&fbi, info,
2723 params_summary,
2724 bb,
2725 &phi_predicate,
2726 nonconstant_names))
2727 break;
2728 first_phi = false;
2729 if (dump_file && (dump_flags & TDF_DETAILS))
2730 {
2731 fprintf (dump_file, " ");
2732 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2733 }
2734 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2735 nonconstant_names);
2736 }
2737 }
2738
2739 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2740
2741 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2742 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2743 {
2744 gimple *stmt = gsi_stmt (bsi);
2745 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2746 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2747 int prob;
2748 ipa_predicate will_be_nonconstant;
2749
2750 /* This relation stmt should be folded after we remove
2751 __builtin_expect call. Adjust the cost here. */
2752 if (stmt == fix_builtin_expect_stmt)
2753 {
2754 this_size--;
2755 this_time--;
2756 }
2757
2758 if (dump_file && (dump_flags & TDF_DETAILS))
2759 {
2760 fprintf (dump_file, " ");
2761 print_gimple_stmt (dump_file, stmt, 0);
2762 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2763 freq.to_double (), this_size,
2764 this_time);
2765 }
2766
2767 if (is_gimple_call (stmt)
2768 && !gimple_call_internal_p (stmt))
2769 {
2770 struct cgraph_edge *edge = node->get_edge (stmt);
2771 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2772
2773 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2774 resolved as constant. We however don't want to optimize
2775 out the cgraph edges. */
2776 if (nonconstant_names.exists ()
2777 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2778 && gimple_call_lhs (stmt)
2779 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2780 {
2781 ipa_predicate false_p = false;
2782 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2783 = false_p;
2784 }
2785 if (ipa_node_params_sum)
2786 {
2787 int count = gimple_call_num_args (stmt);
2788 int i;
2789
2790 if (count)
2791 es->param.safe_grow_cleared (count, true);
2792 for (i = 0; i < count; i++)
2793 {
2794 int prob = param_change_prob (&fbi, stmt, i);
2795 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2796 es->param[i].change_prob = prob;
2797 es->param[i].points_to_local_or_readonly_memory
2798 = points_to_local_or_readonly_memory_p
2799 (gimple_call_arg (stmt, i));
2800 }
2801 }
2802 /* We cannot setup VLA parameters during inlining. */
2803 for (unsigned int i = 0; i < gimple_call_num_args (stmt); ++i)
2804 if (TREE_CODE (gimple_call_arg (stmt, i)) == WITH_SIZE_EXPR)
2805 {
2806 edge->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
2807 break;
2808 }
2809 es->call_stmt_size = this_size;
2810 es->call_stmt_time = this_time;
2811 es->loop_depth = bb_loop_depth (bb);
2812 edge_set_predicate (edge, &bb_predicate);
2813 if (edge->speculative)
2814 {
2815 cgraph_edge *indirect
2816 = edge->speculative_call_indirect_edge ();
2817 ipa_call_summary *es2
2818 = ipa_call_summaries->get_create (indirect);
2819 ipa_call_summaries->duplicate (edge, indirect,
2820 es, es2);
2821
2822 /* Edge is the first direct call.
2823 create and duplicate call summaries for multiple
2824 speculative call targets. */
2825 for (cgraph_edge *direct
2826 = edge->next_speculative_call_target ();
2827 direct;
2828 direct = direct->next_speculative_call_target ())
2829 {
2830 ipa_call_summary *es3
2831 = ipa_call_summaries->get_create (direct);
2832 ipa_call_summaries->duplicate (edge, direct,
2833 es, es3);
2834 }
2835 }
2836 }
2837
2838 /* TODO: When conditional jump or switch is known to be constant, but
2839 we did not translate it into the predicates, we really can account
2840 just maximum of the possible paths. */
2841 if (fbi.info)
2842 will_be_nonconstant
2843 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2844 stmt, nonconstant_names);
2845 else
2846 will_be_nonconstant = true;
2847 if (this_time || this_size)
2848 {
2849 sreal final_time = (sreal)this_time * freq;
2850
2851 prob = eliminated_by_inlining_prob (&fbi, stmt);
2852 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2853 fprintf (dump_file,
2854 "\t\t50%% will be eliminated by inlining\n");
2855 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2856 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2857
2858 ipa_predicate p = bb_predicate & will_be_nonconstant;
2859
2860 /* We can ignore statement when we proved it is never going
2861 to happen, but we cannot do that for call statements
2862 because edges are accounted specially. */
2863
2864 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2865 {
2866 time += final_time;
2867 size += this_size;
2868 }
2869
2870 /* We account everything but the calls. Calls have their own
2871 size/time info attached to cgraph edges. This is necessary
2872 in order to make the cost disappear after inlining. */
2873 if (!is_gimple_call (stmt))
2874 {
2875 if (prob)
2876 {
2877 ipa_predicate ip
2878 = bb_predicate & ipa_predicate::not_inlined ();
2879 info->account_size_time (this_size * prob,
2880 (final_time * prob) / 2, ip,
2881 p);
2882 }
2883 if (prob != 2)
2884 info->account_size_time (this_size * (2 - prob),
2885 (final_time * (2 - prob) / 2),
2886 bb_predicate,
2887 p);
2888 }
2889
2890 if (!info->fp_expressions && fp_expression_p (stmt))
2891 {
2892 info->fp_expressions = true;
2893 if (dump_file)
2894 fprintf (dump_file, " fp_expression set\n");
2895 }
2896 }
2897
2898 /* Account cost of address calculations in the statements. */
2899 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2900 {
2901 for (tree op = gimple_op (stmt, i);
2902 op && handled_component_p (op);
2903 op = TREE_OPERAND (op, 0))
2904 if ((TREE_CODE (op) == ARRAY_REF
2905 || TREE_CODE (op) == ARRAY_RANGE_REF)
2906 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2907 {
2908 ipa_predicate p = bb_predicate;
2909 if (fbi.info)
2910 p = p & will_be_nonconstant_expr_predicate
2911 (&fbi, info, params_summary,
2912 TREE_OPERAND (op, 1),
2913 nonconstant_names);
2914 if (p != false)
2915 {
2916 time += freq;
2917 size += 1;
2918 if (dump_file)
2919 fprintf (dump_file,
2920 "\t\tAccounting address calculation.\n");
2921 info->account_size_time (ipa_fn_summary::size_scale,
2922 freq,
2923 bb_predicate,
2924 p);
2925 }
2926 }
2927 }
2928
2929 }
2930 }
2931 free (order);
2932
2933 if (nonconstant_names.exists () && !early)
2934 {
2935 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2936 unsigned max_loop_predicates = opt_for_fn (node->decl,
2937 param_ipa_max_loop_predicates);
2938
2939 if (dump_file && (dump_flags & TDF_DETAILS))
2940 flow_loops_dump (dump_file, NULL, 0);
2941 scev_initialize ();
2942 for (auto loop : loops_list (cfun, 0))
2943 {
2944 ipa_predicate loop_iterations = true;
2945 sreal header_freq;
2946 edge ex;
2947 unsigned int j;
2948 class tree_niter_desc niter_desc;
2949 if (!loop->header->aux)
2950 continue;
2951
2952 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2953 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2954
2955 bb_predicate = *(ipa_predicate *)loop->header->aux;
2956 auto_vec<edge> exits = get_loop_exit_edges (loop);
2957 FOR_EACH_VEC_ELT (exits, j, ex)
2958 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2959 && !is_gimple_min_invariant (niter_desc.niter))
2960 {
2961 ipa_predicate will_be_nonconstant
2962 = will_be_nonconstant_expr_predicate (&fbi, info,
2963 params_summary,
2964 niter_desc.niter,
2965 nonconstant_names);
2966 if (will_be_nonconstant != true)
2967 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2968 if (will_be_nonconstant != true
2969 && will_be_nonconstant != false)
2970 loop_iterations &= will_be_nonconstant;
2971 }
2972 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
2973 phdr_freq, max_loop_predicates);
2974 }
2975
2976 /* To avoid quadratic behavior we analyze stride predicates only
2977 with respect to the containing loop. Thus we simply iterate
2978 over all defs in the outermost loop body. */
2979 for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
2980 loop != NULL; loop = loop->next)
2981 {
2982 ipa_predicate loop_stride = true;
2983 basic_block *body = get_loop_body (loop);
2984 profile_count phdr_count = loop_preheader_edge (loop)->count ();
2985 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
2986 for (unsigned i = 0; i < loop->num_nodes; i++)
2987 {
2988 gimple_stmt_iterator gsi;
2989 if (!body[i]->aux)
2990 continue;
2991
2992 bb_predicate = *(ipa_predicate *)body[i]->aux;
2993 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2994 gsi_next (&gsi))
2995 {
2996 gimple *stmt = gsi_stmt (gsi);
2997
2998 if (!is_gimple_assign (stmt))
2999 continue;
3000
3001 tree def = gimple_assign_lhs (stmt);
3002 if (TREE_CODE (def) != SSA_NAME)
3003 continue;
3004
3005 affine_iv iv;
3006 if (!simple_iv (loop_containing_stmt (stmt),
3007 loop_containing_stmt (stmt),
3008 def, &iv, true)
3009 || is_gimple_min_invariant (iv.step))
3010 continue;
3011
3012 ipa_predicate will_be_nonconstant
3013 = will_be_nonconstant_expr_predicate (&fbi, info,
3014 params_summary,
3015 iv.step,
3016 nonconstant_names);
3017 if (will_be_nonconstant != true)
3018 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3019 if (will_be_nonconstant != true
3020 && will_be_nonconstant != false)
3021 loop_stride = loop_stride & will_be_nonconstant;
3022 }
3023 }
3024 add_freqcounting_predicate (&s->loop_strides, loop_stride,
3025 phdr_freq, max_loop_predicates);
3026 free (body);
3027 }
3028 scev_finalize ();
3029 }
3030 FOR_ALL_BB_FN (bb, my_function)
3031 {
3032 edge e;
3033 edge_iterator ei;
3034
3035 if (bb->aux)
3036 edge_predicate_pool.remove ((ipa_predicate *)bb->aux);
3037 bb->aux = NULL;
3038 FOR_EACH_EDGE (e, ei, bb->succs)
3039 {
3040 if (e->aux)
3041 edge_predicate_pool.remove ((ipa_predicate *)e->aux);
3042 e->aux = NULL;
3043 }
3044 }
3045 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3046 ipa_size_summary *ss = ipa_size_summaries->get (node);
3047 s->time = time;
3048 ss->self_size = size;
3049 nonconstant_names.release ();
3050 ipa_release_body_info (&fbi);
3051 if (opt_for_fn (node->decl, optimize))
3052 {
3053 if (!early)
3054 loop_optimizer_finalize ();
3055 else if (!ipa_edge_args_sum)
3056 ipa_free_all_node_params ();
3057 free_dominance_info (CDI_DOMINATORS);
3058 free_dominance_info (CDI_POST_DOMINATORS);
3059 }
3060 if (dump_file)
3061 {
3062 fprintf (dump_file, "\n");
3063 ipa_dump_fn_summary (dump_file, node);
3064 }
3065 }
3066
3067
3068 /* Compute function summary.
3069 EARLY is true when we compute parameters during early opts. */
3070
3071 void
compute_fn_summary(struct cgraph_node * node,bool early)3072 compute_fn_summary (struct cgraph_node *node, bool early)
3073 {
3074 HOST_WIDE_INT self_stack_size;
3075 struct cgraph_edge *e;
3076
3077 gcc_assert (!node->inlined_to);
3078
3079 if (!ipa_fn_summaries)
3080 ipa_fn_summary_alloc ();
3081
3082 /* Create a new ipa_fn_summary. */
3083 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3084 ipa_fn_summaries->remove (node);
3085 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3086 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3087
3088 /* Estimate the stack size for the function if we're optimizing. */
3089 self_stack_size = optimize && !node->thunk
3090 ? estimated_stack_frame_size (node) : 0;
3091 size_info->estimated_self_stack_size = self_stack_size;
3092 info->estimated_stack_size = self_stack_size;
3093
3094 if (node->thunk)
3095 {
3096 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3097 ipa_predicate t = true;
3098
3099 node->can_change_signature = false;
3100 es->call_stmt_size = eni_size_weights.call_cost;
3101 es->call_stmt_time = eni_time_weights.call_cost;
3102 info->account_size_time (ipa_fn_summary::size_scale
3103 * opt_for_fn (node->decl,
3104 param_uninlined_function_thunk_insns),
3105 opt_for_fn (node->decl,
3106 param_uninlined_function_thunk_time), t, t);
3107 t = ipa_predicate::not_inlined ();
3108 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3109 ipa_update_overall_fn_summary (node);
3110 size_info->self_size = size_info->size;
3111 if (stdarg_p (TREE_TYPE (node->decl)))
3112 {
3113 info->inlinable = false;
3114 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3115 }
3116 else
3117 info->inlinable = true;
3118 }
3119 else
3120 {
3121 /* Even is_gimple_min_invariant rely on current_function_decl. */
3122 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3123
3124 /* During IPA profile merging we may be called w/o virtual SSA form
3125 built. */
3126 update_ssa (TODO_update_ssa_only_virtuals);
3127
3128 /* Can this function be inlined at all? */
3129 if (!opt_for_fn (node->decl, optimize)
3130 && !lookup_attribute ("always_inline",
3131 DECL_ATTRIBUTES (node->decl)))
3132 info->inlinable = false;
3133 else
3134 info->inlinable = tree_inlinable_function_p (node->decl);
3135
3136 bool no_signature = false;
3137 /* Type attributes can use parameter indices to describe them.
3138 Special case fn spec since we can safely preserve them in
3139 modref summaries. */
3140 for (tree list = TYPE_ATTRIBUTES (TREE_TYPE (node->decl));
3141 list && !no_signature; list = TREE_CHAIN (list))
3142 if (!ipa_param_adjustments::type_attribute_allowed_p
3143 (get_attribute_name (list)))
3144 {
3145 if (dump_file)
3146 {
3147 fprintf (dump_file, "No signature change:"
3148 " function type has unhandled attribute %s.\n",
3149 IDENTIFIER_POINTER (get_attribute_name (list)));
3150 }
3151 no_signature = true;
3152 }
3153 for (tree parm = DECL_ARGUMENTS (node->decl);
3154 parm && !no_signature; parm = DECL_CHAIN (parm))
3155 if (variably_modified_type_p (TREE_TYPE (parm), node->decl))
3156 {
3157 if (dump_file)
3158 {
3159 fprintf (dump_file, "No signature change:"
3160 " has parameter with variably modified type.\n");
3161 }
3162 no_signature = true;
3163 }
3164
3165 /* Likewise for #pragma omp declare simd functions or functions
3166 with simd attribute. */
3167 if (no_signature
3168 || lookup_attribute ("omp declare simd",
3169 DECL_ATTRIBUTES (node->decl)))
3170 node->can_change_signature = false;
3171 else
3172 {
3173 /* Otherwise, inlinable functions always can change signature. */
3174 if (info->inlinable)
3175 node->can_change_signature = true;
3176 else
3177 {
3178 /* Functions calling builtin_apply cannot change signature. */
3179 for (e = node->callees; e; e = e->next_callee)
3180 {
3181 tree cdecl = e->callee->decl;
3182 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
3183 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
3184 break;
3185 }
3186 node->can_change_signature = !e;
3187 }
3188 }
3189 analyze_function_body (node, early);
3190 pop_cfun ();
3191 }
3192
3193 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3194 size_info->size = size_info->self_size;
3195 info->estimated_stack_size = size_info->estimated_self_stack_size;
3196
3197 /* Code above should compute exactly the same result as
3198 ipa_update_overall_fn_summary except for case when speculative
3199 edges are present since these are accounted to size but not
3200 self_size. Do not compare time since different order the roundoff
3201 errors result in slight changes. */
3202 ipa_update_overall_fn_summary (node);
3203 if (flag_checking)
3204 {
3205 for (e = node->indirect_calls; e; e = e->next_callee)
3206 if (e->speculative)
3207 break;
3208 gcc_assert (e || size_info->size == size_info->self_size);
3209 }
3210 }
3211
3212
3213 /* Compute parameters of functions used by inliner using
3214 current_function_decl. */
3215
3216 static unsigned int
compute_fn_summary_for_current(void)3217 compute_fn_summary_for_current (void)
3218 {
3219 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3220 return 0;
3221 }
3222
3223 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3224 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3225 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3226
3227 static bool
estimate_edge_devirt_benefit(struct cgraph_edge * ie,int * size,int * time,ipa_call_arg_values * avals)3228 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3229 int *size, int *time,
3230 ipa_call_arg_values *avals)
3231 {
3232 tree target;
3233 struct cgraph_node *callee;
3234 class ipa_fn_summary *isummary;
3235 enum availability avail;
3236 bool speculative;
3237
3238 if (!avals
3239 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3240 return false;
3241 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3242 return false;
3243
3244 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3245 if (!target || speculative)
3246 return false;
3247
3248 /* Account for difference in cost between indirect and direct calls. */
3249 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3250 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3251 gcc_checking_assert (*time >= 0);
3252 gcc_checking_assert (*size >= 0);
3253
3254 callee = cgraph_node::get (target);
3255 if (!callee || !callee->definition)
3256 return false;
3257 callee = callee->function_symbol (&avail);
3258 if (avail < AVAIL_AVAILABLE)
3259 return false;
3260 isummary = ipa_fn_summaries->get (callee);
3261 if (isummary == NULL)
3262 return false;
3263
3264 return isummary->inlinable;
3265 }
3266
3267 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3268 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3269 devirtualized. AVALS, if non-NULL, describes the context of the call site
3270 as far as values of parameters are concerened. */
3271
3272 static inline void
estimate_edge_size_and_time(struct cgraph_edge * e,int * size,int * min_size,sreal * time,ipa_call_arg_values * avals,ipa_hints * hints)3273 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3274 sreal *time, ipa_call_arg_values *avals,
3275 ipa_hints *hints)
3276 {
3277 class ipa_call_summary *es = ipa_call_summaries->get (e);
3278 int call_size = es->call_stmt_size;
3279 int call_time = es->call_stmt_time;
3280 int cur_size;
3281
3282 if (!e->callee && hints && e->maybe_hot_p ()
3283 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3284 *hints |= INLINE_HINT_indirect_call;
3285 cur_size = call_size * ipa_fn_summary::size_scale;
3286 *size += cur_size;
3287 if (min_size)
3288 *min_size += cur_size;
3289 if (time)
3290 *time += ((sreal)call_time) * e->sreal_frequency ();
3291 }
3292
3293
3294 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3295 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3296 site.
3297
3298 Helper for estimate_calls_size_and_time which does the same but
3299 (in most cases) faster. */
3300
3301 static void
estimate_calls_size_and_time_1(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,ipa_call_arg_values * avals)3302 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3303 int *min_size, sreal *time,
3304 ipa_hints *hints,
3305 clause_t possible_truths,
3306 ipa_call_arg_values *avals)
3307 {
3308 struct cgraph_edge *e;
3309 for (e = node->callees; e; e = e->next_callee)
3310 {
3311 if (!e->inline_failed)
3312 {
3313 gcc_checking_assert (!ipa_call_summaries->get (e));
3314 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3315 hints, possible_truths, avals);
3316
3317 continue;
3318 }
3319 class ipa_call_summary *es = ipa_call_summaries->get (e);
3320
3321 /* Do not care about zero sized builtins. */
3322 if (!es->call_stmt_size)
3323 {
3324 gcc_checking_assert (!es->call_stmt_time);
3325 continue;
3326 }
3327 if (!es->predicate
3328 || es->predicate->evaluate (possible_truths))
3329 {
3330 /* Predicates of calls shall not use NOT_CHANGED codes,
3331 so we do not need to compute probabilities. */
3332 estimate_edge_size_and_time (e, size,
3333 es->predicate ? NULL : min_size,
3334 time, avals, hints);
3335 }
3336 }
3337 for (e = node->indirect_calls; e; e = e->next_callee)
3338 {
3339 class ipa_call_summary *es = ipa_call_summaries->get (e);
3340 if (!es->predicate
3341 || es->predicate->evaluate (possible_truths))
3342 estimate_edge_size_and_time (e, size,
3343 es->predicate ? NULL : min_size,
3344 time, avals, hints);
3345 }
3346 }
3347
3348 /* Populate sum->call_size_time_table for edges from NODE. */
3349
3350 static void
summarize_calls_size_and_time(struct cgraph_node * node,ipa_fn_summary * sum)3351 summarize_calls_size_and_time (struct cgraph_node *node,
3352 ipa_fn_summary *sum)
3353 {
3354 struct cgraph_edge *e;
3355 for (e = node->callees; e; e = e->next_callee)
3356 {
3357 if (!e->inline_failed)
3358 {
3359 gcc_checking_assert (!ipa_call_summaries->get (e));
3360 summarize_calls_size_and_time (e->callee, sum);
3361 continue;
3362 }
3363 int size = 0;
3364 sreal time = 0;
3365
3366 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3367
3368 ipa_predicate pred = true;
3369 class ipa_call_summary *es = ipa_call_summaries->get (e);
3370
3371 if (es->predicate)
3372 pred = *es->predicate;
3373 sum->account_size_time (size, time, pred, pred, true);
3374 }
3375 for (e = node->indirect_calls; e; e = e->next_callee)
3376 {
3377 int size = 0;
3378 sreal time = 0;
3379
3380 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3381 ipa_predicate pred = true;
3382 class ipa_call_summary *es = ipa_call_summaries->get (e);
3383
3384 if (es->predicate)
3385 pred = *es->predicate;
3386 sum->account_size_time (size, time, pred, pred, true);
3387 }
3388 }
3389
3390 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3391 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3392 context of the call site. */
3393
3394 static void
estimate_calls_size_and_time(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,ipa_call_arg_values * avals)3395 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3396 int *min_size, sreal *time,
3397 ipa_hints *hints,
3398 clause_t possible_truths,
3399 ipa_call_arg_values *avals)
3400 {
3401 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3402 bool use_table = true;
3403
3404 gcc_assert (node->callees || node->indirect_calls);
3405
3406 /* During early inlining we do not calculate info for very
3407 large functions and thus there is no need for producing
3408 summaries. */
3409 if (!ipa_node_params_sum)
3410 use_table = false;
3411 /* Do not calculate summaries for simple wrappers; it is waste
3412 of memory. */
3413 else if (node->callees && node->indirect_calls
3414 && node->callees->inline_failed && !node->callees->next_callee)
3415 use_table = false;
3416 /* If there is an indirect edge that may be optimized, we need
3417 to go the slow way. */
3418 else if (avals && hints
3419 && (avals->m_known_vals.length ()
3420 || avals->m_known_contexts.length ()
3421 || avals->m_known_aggs.length ()))
3422 {
3423 ipa_node_params *params_summary = ipa_node_params_sum->get (node);
3424 unsigned int nargs = params_summary
3425 ? ipa_get_param_count (params_summary) : 0;
3426
3427 for (unsigned int i = 0; i < nargs && use_table; i++)
3428 {
3429 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3430 && (avals->safe_sval_at (i)
3431 || (avals->m_known_aggs.length () > i
3432 && avals->m_known_aggs[i].items.length ())))
3433 use_table = false;
3434 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3435 && (avals->m_known_contexts.length () > i
3436 && !avals->m_known_contexts[i].useless_p ()))
3437 use_table = false;
3438 }
3439 }
3440
3441 /* Fast path is via the call size time table. */
3442 if (use_table)
3443 {
3444 /* Build summary if it is absent. */
3445 if (!sum->call_size_time_table.length ())
3446 {
3447 ipa_predicate true_pred = true;
3448 sum->account_size_time (0, 0, true_pred, true_pred, true);
3449 summarize_calls_size_and_time (node, sum);
3450 }
3451
3452 int old_size = *size;
3453 sreal old_time = time ? *time : 0;
3454
3455 if (min_size)
3456 *min_size += sum->call_size_time_table[0].size;
3457
3458 unsigned int i;
3459 size_time_entry *e;
3460
3461 /* Walk the table and account sizes and times. */
3462 for (i = 0; sum->call_size_time_table.iterate (i, &e);
3463 i++)
3464 if (e->exec_predicate.evaluate (possible_truths))
3465 {
3466 *size += e->size;
3467 if (time)
3468 *time += e->time;
3469 }
3470
3471 /* Be careful and see if both methods agree. */
3472 if ((flag_checking || dump_file)
3473 /* Do not try to sanity check when we know we lost some
3474 precision. */
3475 && sum->call_size_time_table.length ()
3476 < ipa_fn_summary::max_size_time_table_size)
3477 {
3478 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3479 possible_truths, avals);
3480 gcc_assert (*size == old_size);
3481 if (time && (*time - old_time > 1 || *time - old_time < -1)
3482 && dump_file)
3483 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3484 old_time.to_double (),
3485 time->to_double ());
3486 }
3487 }
3488 /* Slow path by walking all edges. */
3489 else
3490 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3491 possible_truths, avals);
3492 }
3493
3494 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3495 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3496 caller. */
3497
ipa_call_context(cgraph_node * node,clause_t possible_truths,clause_t nonspec_possible_truths,vec<inline_param_summary> inline_param_summary,ipa_auto_call_arg_values * arg_values)3498 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3499 clause_t nonspec_possible_truths,
3500 vec<inline_param_summary>
3501 inline_param_summary,
3502 ipa_auto_call_arg_values *arg_values)
3503 : m_node (node), m_possible_truths (possible_truths),
3504 m_nonspec_possible_truths (nonspec_possible_truths),
3505 m_inline_param_summary (inline_param_summary),
3506 m_avals (arg_values)
3507 {
3508 }
3509
3510 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3511
3512 void
duplicate_from(const ipa_call_context & ctx)3513 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3514 {
3515 m_node = ctx.m_node;
3516 m_possible_truths = ctx.m_possible_truths;
3517 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3518 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3519 unsigned int nargs = params_summary
3520 ? ipa_get_param_count (params_summary) : 0;
3521
3522 m_inline_param_summary = vNULL;
3523 /* Copy the info only if there is at least one useful entry. */
3524 if (ctx.m_inline_param_summary.exists ())
3525 {
3526 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3527
3528 for (unsigned int i = 0; i < n; i++)
3529 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3530 && !ctx.m_inline_param_summary[i].useless_p ())
3531 {
3532 m_inline_param_summary
3533 = ctx.m_inline_param_summary.copy ();
3534 break;
3535 }
3536 }
3537 m_avals.m_known_vals = vNULL;
3538 if (ctx.m_avals.m_known_vals.exists ())
3539 {
3540 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3541
3542 for (unsigned int i = 0; i < n; i++)
3543 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3544 && ctx.m_avals.m_known_vals[i])
3545 {
3546 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3547 break;
3548 }
3549 }
3550
3551 m_avals.m_known_contexts = vNULL;
3552 if (ctx.m_avals.m_known_contexts.exists ())
3553 {
3554 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3555
3556 for (unsigned int i = 0; i < n; i++)
3557 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3558 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3559 {
3560 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3561 break;
3562 }
3563 }
3564
3565 m_avals.m_known_aggs = vNULL;
3566 if (ctx.m_avals.m_known_aggs.exists ())
3567 {
3568 unsigned int n = MIN (ctx.m_avals.m_known_aggs.length (), nargs);
3569
3570 for (unsigned int i = 0; i < n; i++)
3571 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3572 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3573 {
3574 m_avals.m_known_aggs
3575 = ipa_copy_agg_values (ctx.m_avals.m_known_aggs);
3576 break;
3577 }
3578 }
3579
3580 m_avals.m_known_value_ranges = vNULL;
3581 }
3582
3583 /* Release memory used by known_vals/contexts/aggs vectors. and
3584 inline_param_summary. */
3585
3586 void
release()3587 ipa_cached_call_context::release ()
3588 {
3589 /* See if context is initialized at first place. */
3590 if (!m_node)
3591 return;
3592 ipa_release_agg_values (m_avals.m_known_aggs, true);
3593 m_avals.m_known_vals.release ();
3594 m_avals.m_known_contexts.release ();
3595 m_inline_param_summary.release ();
3596 }
3597
3598 /* Return true if CTX describes the same call context as THIS. */
3599
3600 bool
equal_to(const ipa_call_context & ctx)3601 ipa_call_context::equal_to (const ipa_call_context &ctx)
3602 {
3603 if (m_node != ctx.m_node
3604 || m_possible_truths != ctx.m_possible_truths
3605 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3606 return false;
3607
3608 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3609 unsigned int nargs = params_summary
3610 ? ipa_get_param_count (params_summary) : 0;
3611
3612 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3613 {
3614 for (unsigned int i = 0; i < nargs; i++)
3615 {
3616 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3617 continue;
3618 if (i >= m_inline_param_summary.length ()
3619 || m_inline_param_summary[i].useless_p ())
3620 {
3621 if (i < ctx.m_inline_param_summary.length ()
3622 && !ctx.m_inline_param_summary[i].useless_p ())
3623 return false;
3624 continue;
3625 }
3626 if (i >= ctx.m_inline_param_summary.length ()
3627 || ctx.m_inline_param_summary[i].useless_p ())
3628 {
3629 if (i < m_inline_param_summary.length ()
3630 && !m_inline_param_summary[i].useless_p ())
3631 return false;
3632 continue;
3633 }
3634 if (!m_inline_param_summary[i].equal_to
3635 (ctx.m_inline_param_summary[i]))
3636 return false;
3637 }
3638 }
3639 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3640 {
3641 for (unsigned int i = 0; i < nargs; i++)
3642 {
3643 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3644 continue;
3645 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3646 {
3647 if (i < ctx.m_avals.m_known_vals.length ()
3648 && ctx.m_avals.m_known_vals[i])
3649 return false;
3650 continue;
3651 }
3652 if (i >= ctx.m_avals.m_known_vals.length ()
3653 || !ctx.m_avals.m_known_vals[i])
3654 {
3655 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3656 return false;
3657 continue;
3658 }
3659 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3660 return false;
3661 }
3662 }
3663 if (m_avals.m_known_contexts.exists ()
3664 || ctx.m_avals.m_known_contexts.exists ())
3665 {
3666 for (unsigned int i = 0; i < nargs; i++)
3667 {
3668 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3669 continue;
3670 if (i >= m_avals.m_known_contexts.length ()
3671 || m_avals.m_known_contexts[i].useless_p ())
3672 {
3673 if (i < ctx.m_avals.m_known_contexts.length ()
3674 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3675 return false;
3676 continue;
3677 }
3678 if (i >= ctx.m_avals.m_known_contexts.length ()
3679 || ctx.m_avals.m_known_contexts[i].useless_p ())
3680 {
3681 if (i < m_avals.m_known_contexts.length ()
3682 && !m_avals.m_known_contexts[i].useless_p ())
3683 return false;
3684 continue;
3685 }
3686 if (!m_avals.m_known_contexts[i].equal_to
3687 (ctx.m_avals.m_known_contexts[i]))
3688 return false;
3689 }
3690 }
3691 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3692 {
3693 for (unsigned int i = 0; i < nargs; i++)
3694 {
3695 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3696 continue;
3697 if (i >= m_avals.m_known_aggs.length ()
3698 || m_avals.m_known_aggs[i].is_empty ())
3699 {
3700 if (i < ctx.m_avals.m_known_aggs.length ()
3701 && !ctx.m_avals.m_known_aggs[i].is_empty ())
3702 return false;
3703 continue;
3704 }
3705 if (i >= ctx.m_avals.m_known_aggs.length ()
3706 || ctx.m_avals.m_known_aggs[i].is_empty ())
3707 {
3708 if (i < m_avals.m_known_aggs.length ()
3709 && !m_avals.m_known_aggs[i].is_empty ())
3710 return false;
3711 continue;
3712 }
3713 if (!m_avals.m_known_aggs[i].equal_to (ctx.m_avals.m_known_aggs[i]))
3714 return false;
3715 }
3716 }
3717 return true;
3718 }
3719
3720 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3721 this context. Always compute size and min_size. Only compute time and
3722 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3723 is true. */
3724
3725 void
estimate_size_and_time(ipa_call_estimates * estimates,bool est_times,bool est_hints)3726 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3727 bool est_times, bool est_hints)
3728 {
3729 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3730 size_time_entry *e;
3731 int size = 0;
3732 sreal time = 0;
3733 int min_size = 0;
3734 ipa_hints hints = 0;
3735 sreal loops_with_known_iterations = 0;
3736 sreal loops_with_known_strides = 0;
3737 int i;
3738
3739 if (dump_file && (dump_flags & TDF_DETAILS))
3740 {
3741 bool found = false;
3742 fprintf (dump_file, " Estimating body: %s\n"
3743 " Known to be false: ", m_node->dump_name ());
3744
3745 for (i = ipa_predicate::not_inlined_condition;
3746 i < (ipa_predicate::first_dynamic_condition
3747 + (int) vec_safe_length (info->conds)); i++)
3748 if (!(m_possible_truths & (1 << i)))
3749 {
3750 if (found)
3751 fprintf (dump_file, ", ");
3752 found = true;
3753 dump_condition (dump_file, info->conds, i);
3754 }
3755 }
3756
3757 if (m_node->callees || m_node->indirect_calls)
3758 estimate_calls_size_and_time (m_node, &size, &min_size,
3759 est_times ? &time : NULL,
3760 est_hints ? &hints : NULL, m_possible_truths,
3761 &m_avals);
3762
3763 sreal nonspecialized_time = time;
3764
3765 min_size += info->size_time_table[0].size;
3766 for (i = 0; info->size_time_table.iterate (i, &e); i++)
3767 {
3768 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3769
3770 /* Because predicates are conservative, it can happen that nonconst is 1
3771 but exec is 0. */
3772 if (exec)
3773 {
3774 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3775
3776 gcc_checking_assert (e->time >= 0);
3777 gcc_checking_assert (time >= 0);
3778
3779 /* We compute specialized size only because size of nonspecialized
3780 copy is context independent.
3781
3782 The difference between nonspecialized execution and specialized is
3783 that nonspecialized is not going to have optimized out computations
3784 known to be constant in a specialized setting. */
3785 if (nonconst)
3786 size += e->size;
3787 if (!est_times)
3788 continue;
3789 nonspecialized_time += e->time;
3790 if (!nonconst)
3791 ;
3792 else if (!m_inline_param_summary.exists ())
3793 {
3794 if (nonconst)
3795 time += e->time;
3796 }
3797 else
3798 {
3799 int prob = e->nonconst_predicate.probability
3800 (info->conds, m_possible_truths,
3801 m_inline_param_summary);
3802 gcc_checking_assert (prob >= 0);
3803 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3804 if (prob == REG_BR_PROB_BASE)
3805 time += e->time;
3806 else
3807 time += e->time * prob / REG_BR_PROB_BASE;
3808 }
3809 gcc_checking_assert (time >= 0);
3810 }
3811 }
3812 gcc_checking_assert (info->size_time_table[0].exec_predicate == true);
3813 gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true);
3814 gcc_checking_assert (min_size >= 0);
3815 gcc_checking_assert (size >= 0);
3816 gcc_checking_assert (time >= 0);
3817 /* nonspecialized_time should be always bigger than specialized time.
3818 Roundoff issues however may get into the way. */
3819 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3820
3821 /* Roundoff issues may make specialized time bigger than nonspecialized
3822 time. We do not really want that to happen because some heuristics
3823 may get confused by seeing negative speedups. */
3824 if (time > nonspecialized_time)
3825 time = nonspecialized_time;
3826
3827 if (est_hints)
3828 {
3829 if (info->scc_no)
3830 hints |= INLINE_HINT_in_scc;
3831 if (DECL_DECLARED_INLINE_P (m_node->decl))
3832 hints |= INLINE_HINT_declared_inline;
3833 if (info->builtin_constant_p_parms.length ()
3834 && DECL_DECLARED_INLINE_P (m_node->decl))
3835 hints |= INLINE_HINT_builtin_constant_p;
3836
3837 ipa_freqcounting_predicate *fcp;
3838 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3839 if (!fcp->predicate->evaluate (m_possible_truths))
3840 {
3841 hints |= INLINE_HINT_loop_iterations;
3842 loops_with_known_iterations += fcp->freq;
3843 }
3844 estimates->loops_with_known_iterations = loops_with_known_iterations;
3845
3846 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3847 if (!fcp->predicate->evaluate (m_possible_truths))
3848 {
3849 hints |= INLINE_HINT_loop_stride;
3850 loops_with_known_strides += fcp->freq;
3851 }
3852 estimates->loops_with_known_strides = loops_with_known_strides;
3853 }
3854
3855 size = RDIV (size, ipa_fn_summary::size_scale);
3856 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3857
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3859 {
3860 fprintf (dump_file, "\n size:%i", (int) size);
3861 if (est_times)
3862 fprintf (dump_file, " time:%f nonspec time:%f",
3863 time.to_double (), nonspecialized_time.to_double ());
3864 if (est_hints)
3865 fprintf (dump_file, " loops with known iterations:%f "
3866 "known strides:%f", loops_with_known_iterations.to_double (),
3867 loops_with_known_strides.to_double ());
3868 fprintf (dump_file, "\n");
3869 }
3870 if (est_times)
3871 {
3872 estimates->time = time;
3873 estimates->nonspecialized_time = nonspecialized_time;
3874 }
3875 estimates->size = size;
3876 estimates->min_size = min_size;
3877 if (est_hints)
3878 estimates->hints = hints;
3879 return;
3880 }
3881
3882
3883 /* Estimate size and time needed to execute callee of EDGE assuming that
3884 parameters known to be constant at caller of EDGE are propagated.
3885 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3886 and types for parameters. */
3887
3888 void
estimate_ipcp_clone_size_and_time(struct cgraph_node * node,ipa_auto_call_arg_values * avals,ipa_call_estimates * estimates)3889 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3890 ipa_auto_call_arg_values *avals,
3891 ipa_call_estimates *estimates)
3892 {
3893 clause_t clause, nonspec_clause;
3894
3895 evaluate_conditions_for_known_args (node, false, avals, &clause,
3896 &nonspec_clause);
3897 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
3898 ctx.estimate_size_and_time (estimates);
3899 }
3900
3901 /* Return stack frame offset where frame of NODE is supposed to start inside
3902 of the function it is inlined to.
3903 Return 0 for functions that are not inlined. */
3904
3905 HOST_WIDE_INT
ipa_get_stack_frame_offset(struct cgraph_node * node)3906 ipa_get_stack_frame_offset (struct cgraph_node *node)
3907 {
3908 HOST_WIDE_INT offset = 0;
3909 if (!node->inlined_to)
3910 return 0;
3911 node = node->callers->caller;
3912 while (true)
3913 {
3914 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3915 if (!node->inlined_to)
3916 return offset;
3917 node = node->callers->caller;
3918 }
3919 }
3920
3921
3922 /* Update summary information of inline clones after inlining.
3923 Compute peak stack usage. */
3924
3925 static void
inline_update_callee_summaries(struct cgraph_node * node,int depth)3926 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3927 {
3928 struct cgraph_edge *e;
3929
3930 ipa_propagate_frequency (node);
3931 for (e = node->callees; e; e = e->next_callee)
3932 {
3933 if (!e->inline_failed)
3934 inline_update_callee_summaries (e->callee, depth);
3935 else
3936 ipa_call_summaries->get (e)->loop_depth += depth;
3937 }
3938 for (e = node->indirect_calls; e; e = e->next_callee)
3939 ipa_call_summaries->get (e)->loop_depth += depth;
3940 }
3941
3942 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
3943 INLINED_EDGE has been inlined.
3944
3945 When function A is inlined in B and A calls C with parameter that
3946 changes with probability PROB1 and C is known to be passthrough
3947 of argument if B that change with probability PROB2, the probability
3948 of change is now PROB1*PROB2. */
3949
3950 static void
remap_edge_params(struct cgraph_edge * inlined_edge,struct cgraph_edge * edge)3951 remap_edge_params (struct cgraph_edge *inlined_edge,
3952 struct cgraph_edge *edge)
3953 {
3954 if (ipa_node_params_sum)
3955 {
3956 int i;
3957 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
3958 if (!args)
3959 return;
3960 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3961 class ipa_call_summary *inlined_es
3962 = ipa_call_summaries->get (inlined_edge);
3963
3964 if (es->param.length () == 0)
3965 return;
3966
3967 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3968 {
3969 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3970 if (jfunc->type == IPA_JF_PASS_THROUGH
3971 || jfunc->type == IPA_JF_ANCESTOR)
3972 {
3973 int id = jfunc->type == IPA_JF_PASS_THROUGH
3974 ? ipa_get_jf_pass_through_formal_id (jfunc)
3975 : ipa_get_jf_ancestor_formal_id (jfunc);
3976 if (id < (int) inlined_es->param.length ())
3977 {
3978 int prob1 = es->param[i].change_prob;
3979 int prob2 = inlined_es->param[id].change_prob;
3980 int prob = combine_probabilities (prob1, prob2);
3981
3982 if (prob1 && prob2 && !prob)
3983 prob = 1;
3984
3985 es->param[i].change_prob = prob;
3986
3987 if (inlined_es
3988 ->param[id].points_to_local_or_readonly_memory)
3989 es->param[i].points_to_local_or_readonly_memory = true;
3990 }
3991 if (!es->param[i].points_to_local_or_readonly_memory
3992 && jfunc->type == IPA_JF_CONST
3993 && points_to_local_or_readonly_memory_p
3994 (ipa_get_jf_constant (jfunc)))
3995 es->param[i].points_to_local_or_readonly_memory = true;
3996 }
3997 }
3998 }
3999 }
4000
4001 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4002
4003 Remap predicates of callees of NODE. Rest of arguments match
4004 remap_predicate.
4005
4006 Also update change probabilities. */
4007
4008 static void
remap_edge_summaries(struct cgraph_edge * inlined_edge,struct cgraph_node * node,class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,const vec<int> & operand_map,const vec<HOST_WIDE_INT> & offset_map,clause_t possible_truths,ipa_predicate * toplev_predicate)4009 remap_edge_summaries (struct cgraph_edge *inlined_edge,
4010 struct cgraph_node *node,
4011 class ipa_fn_summary *info,
4012 class ipa_node_params *params_summary,
4013 class ipa_fn_summary *callee_info,
4014 const vec<int> &operand_map,
4015 const vec<HOST_WIDE_INT> &offset_map,
4016 clause_t possible_truths,
4017 ipa_predicate *toplev_predicate)
4018 {
4019 struct cgraph_edge *e, *next;
4020 for (e = node->callees; e; e = next)
4021 {
4022 ipa_predicate p;
4023 next = e->next_callee;
4024
4025 if (e->inline_failed)
4026 {
4027 class ipa_call_summary *es = ipa_call_summaries->get (e);
4028 remap_edge_params (inlined_edge, e);
4029
4030 if (es->predicate)
4031 {
4032 p = es->predicate->remap_after_inlining
4033 (info, params_summary,
4034 callee_info, operand_map,
4035 offset_map, possible_truths,
4036 *toplev_predicate);
4037 edge_set_predicate (e, &p);
4038 }
4039 else
4040 edge_set_predicate (e, toplev_predicate);
4041 }
4042 else
4043 remap_edge_summaries (inlined_edge, e->callee, info,
4044 params_summary, callee_info,
4045 operand_map, offset_map, possible_truths,
4046 toplev_predicate);
4047 }
4048 for (e = node->indirect_calls; e; e = next)
4049 {
4050 class ipa_call_summary *es = ipa_call_summaries->get (e);
4051 ipa_predicate p;
4052 next = e->next_callee;
4053
4054 remap_edge_params (inlined_edge, e);
4055 if (es->predicate)
4056 {
4057 p = es->predicate->remap_after_inlining
4058 (info, params_summary,
4059 callee_info, operand_map, offset_map,
4060 possible_truths, *toplev_predicate);
4061 edge_set_predicate (e, &p);
4062 }
4063 else
4064 edge_set_predicate (e, toplev_predicate);
4065 }
4066 }
4067
4068 /* Run remap_after_inlining on each predicate in V. */
4069
4070 static void
remap_freqcounting_predicate(class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,vec<ipa_freqcounting_predicate,va_gc> * v,const vec<int> & operand_map,const vec<HOST_WIDE_INT> & offset_map,clause_t possible_truths,ipa_predicate * toplev_predicate)4071 remap_freqcounting_predicate (class ipa_fn_summary *info,
4072 class ipa_node_params *params_summary,
4073 class ipa_fn_summary *callee_info,
4074 vec<ipa_freqcounting_predicate, va_gc> *v,
4075 const vec<int> &operand_map,
4076 const vec<HOST_WIDE_INT> &offset_map,
4077 clause_t possible_truths,
4078 ipa_predicate *toplev_predicate)
4079
4080 {
4081 ipa_freqcounting_predicate *fcp;
4082 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4083 {
4084 ipa_predicate p
4085 = fcp->predicate->remap_after_inlining (info, params_summary,
4086 callee_info, operand_map,
4087 offset_map, possible_truths,
4088 *toplev_predicate);
4089 if (p != false && p != true)
4090 *fcp->predicate &= p;
4091 }
4092 }
4093
4094 /* We inlined EDGE. Update summary of the function we inlined into. */
4095
4096 void
ipa_merge_fn_summary_after_inlining(struct cgraph_edge * edge)4097 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4098 {
4099 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4100 struct cgraph_node *to = (edge->caller->inlined_to
4101 ? edge->caller->inlined_to : edge->caller);
4102 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4103 clause_t clause = 0; /* not_inline is known to be false. */
4104 size_time_entry *e;
4105 auto_vec<int, 8> operand_map;
4106 auto_vec<HOST_WIDE_INT, 8> offset_map;
4107 int i;
4108 ipa_predicate toplev_predicate;
4109 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4110 ipa_node_params *params_summary = (ipa_node_params_sum
4111 ? ipa_node_params_sum->get (to) : NULL);
4112
4113 if (es->predicate)
4114 toplev_predicate = *es->predicate;
4115 else
4116 toplev_predicate = true;
4117
4118 info->fp_expressions |= callee_info->fp_expressions;
4119
4120 if (callee_info->conds)
4121 {
4122 ipa_auto_call_arg_values avals;
4123 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4124 }
4125 if (ipa_node_params_sum && callee_info->conds)
4126 {
4127 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4128 int count = args ? ipa_get_cs_argument_count (args) : 0;
4129 int i;
4130
4131 if (count)
4132 {
4133 operand_map.safe_grow_cleared (count, true);
4134 offset_map.safe_grow_cleared (count, true);
4135 }
4136 for (i = 0; i < count; i++)
4137 {
4138 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4139 int map = -1;
4140
4141 /* TODO: handle non-NOPs when merging. */
4142 if (jfunc->type == IPA_JF_PASS_THROUGH)
4143 {
4144 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4145 map = ipa_get_jf_pass_through_formal_id (jfunc);
4146 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4147 offset_map[i] = -1;
4148 }
4149 else if (jfunc->type == IPA_JF_ANCESTOR)
4150 {
4151 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4152 if (offset >= 0 && offset < INT_MAX)
4153 {
4154 map = ipa_get_jf_ancestor_formal_id (jfunc);
4155 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4156 offset = -1;
4157 offset_map[i] = offset;
4158 }
4159 }
4160 operand_map[i] = map;
4161 gcc_assert (map < ipa_get_param_count (params_summary));
4162 }
4163
4164 int ip;
4165 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4166 if (ip < count && operand_map[ip] >= 0)
4167 add_builtin_constant_p_parm (info, operand_map[ip]);
4168 }
4169 sreal freq = edge->sreal_frequency ();
4170 for (i = 0; callee_info->size_time_table.iterate (i, &e); i++)
4171 {
4172 ipa_predicate p;
4173 p = e->exec_predicate.remap_after_inlining
4174 (info, params_summary,
4175 callee_info, operand_map,
4176 offset_map, clause,
4177 toplev_predicate);
4178 ipa_predicate nonconstp;
4179 nonconstp = e->nonconst_predicate.remap_after_inlining
4180 (info, params_summary,
4181 callee_info, operand_map,
4182 offset_map, clause,
4183 toplev_predicate);
4184 if (p != false && nonconstp != false)
4185 {
4186 sreal add_time = ((sreal)e->time * freq);
4187 int prob = e->nonconst_predicate.probability (callee_info->conds,
4188 clause, es->param);
4189 if (prob != REG_BR_PROB_BASE)
4190 add_time = add_time * prob / REG_BR_PROB_BASE;
4191 if (prob != REG_BR_PROB_BASE
4192 && dump_file && (dump_flags & TDF_DETAILS))
4193 {
4194 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4195 (double) prob / REG_BR_PROB_BASE);
4196 }
4197 info->account_size_time (e->size, add_time, p, nonconstp);
4198 }
4199 }
4200 remap_edge_summaries (edge, edge->callee, info, params_summary,
4201 callee_info, operand_map,
4202 offset_map, clause, &toplev_predicate);
4203 remap_freqcounting_predicate (info, params_summary, callee_info,
4204 info->loop_iterations, operand_map,
4205 offset_map, clause, &toplev_predicate);
4206 remap_freqcounting_predicate (info, params_summary, callee_info,
4207 info->loop_strides, operand_map,
4208 offset_map, clause, &toplev_predicate);
4209
4210 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4211 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4212
4213 if (info->estimated_stack_size < peak)
4214 info->estimated_stack_size = peak;
4215
4216 inline_update_callee_summaries (edge->callee, es->loop_depth);
4217 if (info->call_size_time_table.length ())
4218 {
4219 int edge_size = 0;
4220 sreal edge_time = 0;
4221
4222 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4223 /* Unaccount size and time of the optimized out call. */
4224 info->account_size_time (-edge_size, -edge_time,
4225 es->predicate ? *es->predicate : true,
4226 es->predicate ? *es->predicate : true,
4227 true);
4228 /* Account new calls. */
4229 summarize_calls_size_and_time (edge->callee, info);
4230 }
4231
4232 /* Free summaries that are not maintained for inline clones/edges. */
4233 ipa_call_summaries->remove (edge);
4234 ipa_fn_summaries->remove (edge->callee);
4235 ipa_remove_from_growth_caches (edge);
4236 }
4237
4238 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4239 overall size and time. Recompute it.
4240 If RESET is true also recompute call_time_size_table. */
4241
4242 void
ipa_update_overall_fn_summary(struct cgraph_node * node,bool reset)4243 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4244 {
4245 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4246 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4247 size_time_entry *e;
4248 int i;
4249
4250 size_info->size = 0;
4251 info->time = 0;
4252 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4253 {
4254 size_info->size += e->size;
4255 info->time += e->time;
4256 }
4257 info->min_size = info->size_time_table[0].size;
4258 if (reset)
4259 info->call_size_time_table.release ();
4260 if (node->callees || node->indirect_calls)
4261 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4262 &info->time, NULL,
4263 ~(clause_t) (1 << ipa_predicate::false_condition),
4264 NULL);
4265 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4266 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4267 }
4268
4269
4270 /* This function performs intraprocedural analysis in NODE that is required to
4271 inline indirect calls. */
4272
4273 static void
inline_indirect_intraprocedural_analysis(struct cgraph_node * node)4274 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4275 {
4276 ipa_analyze_node (node);
4277 if (dump_file && (dump_flags & TDF_DETAILS))
4278 {
4279 ipa_print_node_params (dump_file, node);
4280 ipa_print_node_jump_functions (dump_file, node);
4281 }
4282 }
4283
4284
4285 /* Note function body size. */
4286
4287 void
inline_analyze_function(struct cgraph_node * node)4288 inline_analyze_function (struct cgraph_node *node)
4289 {
4290 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4291
4292 if (dump_file)
4293 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4294 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4295 inline_indirect_intraprocedural_analysis (node);
4296 compute_fn_summary (node, false);
4297 if (!optimize)
4298 {
4299 struct cgraph_edge *e;
4300 for (e = node->callees; e; e = e->next_callee)
4301 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4302 for (e = node->indirect_calls; e; e = e->next_callee)
4303 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4304 }
4305
4306 pop_cfun ();
4307 }
4308
4309
4310 /* Called when new function is inserted to callgraph late. */
4311
4312 void
insert(struct cgraph_node * node,ipa_fn_summary *)4313 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4314 {
4315 inline_analyze_function (node);
4316 }
4317
4318 /* Note function body size. */
4319
4320 static void
ipa_fn_summary_generate(void)4321 ipa_fn_summary_generate (void)
4322 {
4323 struct cgraph_node *node;
4324
4325 FOR_EACH_DEFINED_FUNCTION (node)
4326 if (DECL_STRUCT_FUNCTION (node->decl))
4327 node->versionable = tree_versionable_function_p (node->decl);
4328
4329 ipa_fn_summary_alloc ();
4330
4331 ipa_fn_summaries->enable_insertion_hook ();
4332
4333 ipa_register_cgraph_hooks ();
4334
4335 FOR_EACH_DEFINED_FUNCTION (node)
4336 if (!node->alias
4337 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4338 || opt_for_fn (node->decl, optimize)))
4339 inline_analyze_function (node);
4340 }
4341
4342
4343 /* Write inline summary for edge E to OB. */
4344
4345 static void
read_ipa_call_summary(class lto_input_block * ib,struct cgraph_edge * e,bool prevails)4346 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4347 bool prevails)
4348 {
4349 class ipa_call_summary *es = prevails
4350 ? ipa_call_summaries->get_create (e) : NULL;
4351 ipa_predicate p;
4352 int length, i;
4353
4354 int size = streamer_read_uhwi (ib);
4355 int time = streamer_read_uhwi (ib);
4356 int depth = streamer_read_uhwi (ib);
4357
4358 if (es)
4359 {
4360 es->call_stmt_size = size;
4361 es->call_stmt_time = time;
4362 es->loop_depth = depth;
4363 }
4364
4365 bitpack_d bp = streamer_read_bitpack (ib);
4366 if (es)
4367 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4368 else
4369 bp_unpack_value (&bp, 1);
4370
4371 p.stream_in (ib);
4372 if (es)
4373 edge_set_predicate (e, &p);
4374 length = streamer_read_uhwi (ib);
4375 if (length && es
4376 && (e->possibly_call_in_translation_unit_p ()
4377 /* Also stream in jump functions to builtins in hope that they
4378 will get fnspecs. */
4379 || fndecl_built_in_p (e->callee->decl, BUILT_IN_NORMAL)))
4380 {
4381 es->param.safe_grow_cleared (length, true);
4382 for (i = 0; i < length; i++)
4383 {
4384 es->param[i].change_prob = streamer_read_uhwi (ib);
4385 es->param[i].points_to_local_or_readonly_memory
4386 = streamer_read_uhwi (ib);
4387 }
4388 }
4389 else
4390 {
4391 for (i = 0; i < length; i++)
4392 {
4393 streamer_read_uhwi (ib);
4394 streamer_read_uhwi (ib);
4395 }
4396 }
4397 }
4398
4399
4400 /* Stream in inline summaries from the section. */
4401
4402 static void
inline_read_section(struct lto_file_decl_data * file_data,const char * data,size_t len)4403 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4404 size_t len)
4405 {
4406 const struct lto_function_header *header =
4407 (const struct lto_function_header *) data;
4408 const int cfg_offset = sizeof (struct lto_function_header);
4409 const int main_offset = cfg_offset + header->cfg_size;
4410 const int string_offset = main_offset + header->main_size;
4411 class data_in *data_in;
4412 unsigned int i, count2, j;
4413 unsigned int f_count;
4414
4415 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4416 file_data->mode_table);
4417
4418 data_in =
4419 lto_data_in_create (file_data, (const char *) data + string_offset,
4420 header->string_size, vNULL);
4421 f_count = streamer_read_uhwi (&ib);
4422 for (i = 0; i < f_count; i++)
4423 {
4424 unsigned int index;
4425 struct cgraph_node *node;
4426 class ipa_fn_summary *info;
4427 class ipa_node_params *params_summary;
4428 class ipa_size_summary *size_info;
4429 lto_symtab_encoder_t encoder;
4430 struct bitpack_d bp;
4431 struct cgraph_edge *e;
4432 ipa_predicate p;
4433
4434 index = streamer_read_uhwi (&ib);
4435 encoder = file_data->symtab_node_encoder;
4436 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4437 index));
4438 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4439 params_summary = node->prevailing_p ()
4440 ? ipa_node_params_sum->get (node) : NULL;
4441 size_info = node->prevailing_p ()
4442 ? ipa_size_summaries->get_create (node) : NULL;
4443
4444 int stack_size = streamer_read_uhwi (&ib);
4445 int size = streamer_read_uhwi (&ib);
4446 sreal time = sreal::stream_in (&ib);
4447
4448 if (info)
4449 {
4450 info->estimated_stack_size
4451 = size_info->estimated_self_stack_size = stack_size;
4452 size_info->size = size_info->self_size = size;
4453 info->time = time;
4454 }
4455
4456 bp = streamer_read_bitpack (&ib);
4457 if (info)
4458 {
4459 info->inlinable = bp_unpack_value (&bp, 1);
4460 info->fp_expressions = bp_unpack_value (&bp, 1);
4461 }
4462 else
4463 {
4464 bp_unpack_value (&bp, 1);
4465 bp_unpack_value (&bp, 1);
4466 }
4467
4468 count2 = streamer_read_uhwi (&ib);
4469 gcc_assert (!info || !info->conds);
4470 if (info)
4471 vec_safe_reserve_exact (info->conds, count2);
4472 for (j = 0; j < count2; j++)
4473 {
4474 struct condition c;
4475 unsigned int k, count3;
4476 c.operand_num = streamer_read_uhwi (&ib);
4477 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4478 c.type = stream_read_tree (&ib, data_in);
4479 c.val = stream_read_tree (&ib, data_in);
4480 bp = streamer_read_bitpack (&ib);
4481 c.agg_contents = bp_unpack_value (&bp, 1);
4482 c.by_ref = bp_unpack_value (&bp, 1);
4483 if (c.agg_contents)
4484 c.offset = streamer_read_uhwi (&ib);
4485 count3 = streamer_read_uhwi (&ib);
4486 c.param_ops = NULL;
4487 if (info)
4488 vec_safe_reserve_exact (c.param_ops, count3);
4489 if (params_summary)
4490 ipa_set_param_used_by_ipa_predicates
4491 (params_summary, c.operand_num, true);
4492 for (k = 0; k < count3; k++)
4493 {
4494 struct expr_eval_op op;
4495 enum gimple_rhs_class rhs_class;
4496 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4497 op.type = stream_read_tree (&ib, data_in);
4498 switch (rhs_class = get_gimple_rhs_class (op.code))
4499 {
4500 case GIMPLE_UNARY_RHS:
4501 op.index = 0;
4502 op.val[0] = NULL_TREE;
4503 op.val[1] = NULL_TREE;
4504 break;
4505
4506 case GIMPLE_BINARY_RHS:
4507 case GIMPLE_TERNARY_RHS:
4508 bp = streamer_read_bitpack (&ib);
4509 op.index = bp_unpack_value (&bp, 2);
4510 op.val[0] = stream_read_tree (&ib, data_in);
4511 if (rhs_class == GIMPLE_BINARY_RHS)
4512 op.val[1] = NULL_TREE;
4513 else
4514 op.val[1] = stream_read_tree (&ib, data_in);
4515 break;
4516
4517 default:
4518 fatal_error (UNKNOWN_LOCATION,
4519 "invalid fnsummary in LTO stream");
4520 }
4521 if (info)
4522 c.param_ops->quick_push (op);
4523 }
4524 if (info)
4525 info->conds->quick_push (c);
4526 }
4527 count2 = streamer_read_uhwi (&ib);
4528 gcc_assert (!info || !info->size_time_table.length ());
4529 if (info && count2)
4530 info->size_time_table.reserve_exact (count2);
4531 for (j = 0; j < count2; j++)
4532 {
4533 class size_time_entry e;
4534
4535 e.size = streamer_read_uhwi (&ib);
4536 e.time = sreal::stream_in (&ib);
4537 e.exec_predicate.stream_in (&ib);
4538 e.nonconst_predicate.stream_in (&ib);
4539
4540 if (info)
4541 info->size_time_table.quick_push (e);
4542 }
4543
4544 count2 = streamer_read_uhwi (&ib);
4545 for (j = 0; j < count2; j++)
4546 {
4547 p.stream_in (&ib);
4548 sreal fcp_freq = sreal::stream_in (&ib);
4549 if (info)
4550 {
4551 ipa_freqcounting_predicate fcp;
4552 fcp.predicate = NULL;
4553 set_hint_predicate (&fcp.predicate, p);
4554 fcp.freq = fcp_freq;
4555 vec_safe_push (info->loop_iterations, fcp);
4556 }
4557 }
4558 count2 = streamer_read_uhwi (&ib);
4559 for (j = 0; j < count2; j++)
4560 {
4561 p.stream_in (&ib);
4562 sreal fcp_freq = sreal::stream_in (&ib);
4563 if (info)
4564 {
4565 ipa_freqcounting_predicate fcp;
4566 fcp.predicate = NULL;
4567 set_hint_predicate (&fcp.predicate, p);
4568 fcp.freq = fcp_freq;
4569 vec_safe_push (info->loop_strides, fcp);
4570 }
4571 }
4572 count2 = streamer_read_uhwi (&ib);
4573 if (info && count2)
4574 info->builtin_constant_p_parms.reserve_exact (count2);
4575 for (j = 0; j < count2; j++)
4576 {
4577 int parm = streamer_read_uhwi (&ib);
4578 if (info)
4579 info->builtin_constant_p_parms.quick_push (parm);
4580 }
4581 for (e = node->callees; e; e = e->next_callee)
4582 read_ipa_call_summary (&ib, e, info != NULL);
4583 for (e = node->indirect_calls; e; e = e->next_callee)
4584 read_ipa_call_summary (&ib, e, info != NULL);
4585 }
4586
4587 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4588 len);
4589 lto_data_in_delete (data_in);
4590 }
4591
4592
4593 /* Read inline summary. Jump functions are shared among ipa-cp
4594 and inliner, so when ipa-cp is active, we don't need to write them
4595 twice. */
4596
4597 static void
ipa_fn_summary_read(void)4598 ipa_fn_summary_read (void)
4599 {
4600 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4601 struct lto_file_decl_data *file_data;
4602 unsigned int j = 0;
4603
4604 ipa_prop_read_jump_functions ();
4605 ipa_fn_summary_alloc ();
4606
4607 while ((file_data = file_data_vec[j++]))
4608 {
4609 size_t len;
4610 const char *data
4611 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4612 &len);
4613 if (data)
4614 inline_read_section (file_data, data, len);
4615 else
4616 /* Fatal error here. We do not want to support compiling ltrans units
4617 with different version of compiler or different flags than the WPA
4618 unit, so this should never happen. */
4619 fatal_error (input_location,
4620 "ipa inline summary is missing in input file");
4621 }
4622 ipa_register_cgraph_hooks ();
4623
4624 gcc_assert (ipa_fn_summaries);
4625 ipa_fn_summaries->enable_insertion_hook ();
4626 }
4627
4628
4629 /* Write inline summary for edge E to OB. */
4630
4631 static void
write_ipa_call_summary(struct output_block * ob,struct cgraph_edge * e)4632 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4633 {
4634 class ipa_call_summary *es = ipa_call_summaries->get (e);
4635 int i;
4636
4637 streamer_write_uhwi (ob, es->call_stmt_size);
4638 streamer_write_uhwi (ob, es->call_stmt_time);
4639 streamer_write_uhwi (ob, es->loop_depth);
4640
4641 bitpack_d bp = bitpack_create (ob->main_stream);
4642 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4643 streamer_write_bitpack (&bp);
4644
4645 if (es->predicate)
4646 es->predicate->stream_out (ob);
4647 else
4648 streamer_write_uhwi (ob, 0);
4649 streamer_write_uhwi (ob, es->param.length ());
4650 for (i = 0; i < (int) es->param.length (); i++)
4651 {
4652 streamer_write_uhwi (ob, es->param[i].change_prob);
4653 streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory);
4654 }
4655 }
4656
4657
4658 /* Write inline summary for node in SET.
4659 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4660 active, we don't need to write them twice. */
4661
4662 static void
ipa_fn_summary_write(void)4663 ipa_fn_summary_write (void)
4664 {
4665 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4666 lto_symtab_encoder_iterator lsei;
4667 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4668 unsigned int count = 0;
4669
4670 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4671 lsei_next_function_in_partition (&lsei))
4672 {
4673 cgraph_node *cnode = lsei_cgraph_node (lsei);
4674 if (cnode->definition && !cnode->alias)
4675 count++;
4676 }
4677 streamer_write_uhwi (ob, count);
4678
4679 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4680 lsei_next_function_in_partition (&lsei))
4681 {
4682 cgraph_node *cnode = lsei_cgraph_node (lsei);
4683 if (cnode->definition && !cnode->alias)
4684 {
4685 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4686 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4687 struct bitpack_d bp;
4688 struct cgraph_edge *edge;
4689 int i;
4690 size_time_entry *e;
4691 struct condition *c;
4692
4693 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4694 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4695 streamer_write_hwi (ob, size_info->self_size);
4696 info->time.stream_out (ob);
4697 bp = bitpack_create (ob->main_stream);
4698 bp_pack_value (&bp, info->inlinable, 1);
4699 bp_pack_value (&bp, info->fp_expressions, 1);
4700 streamer_write_bitpack (&bp);
4701 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4702 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4703 {
4704 int j;
4705 struct expr_eval_op *op;
4706
4707 streamer_write_uhwi (ob, c->operand_num);
4708 streamer_write_uhwi (ob, c->code);
4709 stream_write_tree (ob, c->type, true);
4710 stream_write_tree (ob, c->val, true);
4711 bp = bitpack_create (ob->main_stream);
4712 bp_pack_value (&bp, c->agg_contents, 1);
4713 bp_pack_value (&bp, c->by_ref, 1);
4714 streamer_write_bitpack (&bp);
4715 if (c->agg_contents)
4716 streamer_write_uhwi (ob, c->offset);
4717 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4718 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4719 {
4720 streamer_write_uhwi (ob, op->code);
4721 stream_write_tree (ob, op->type, true);
4722 if (op->val[0])
4723 {
4724 bp = bitpack_create (ob->main_stream);
4725 bp_pack_value (&bp, op->index, 2);
4726 streamer_write_bitpack (&bp);
4727 stream_write_tree (ob, op->val[0], true);
4728 if (op->val[1])
4729 stream_write_tree (ob, op->val[1], true);
4730 }
4731 }
4732 }
4733 streamer_write_uhwi (ob, info->size_time_table.length ());
4734 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4735 {
4736 streamer_write_uhwi (ob, e->size);
4737 e->time.stream_out (ob);
4738 e->exec_predicate.stream_out (ob);
4739 e->nonconst_predicate.stream_out (ob);
4740 }
4741 ipa_freqcounting_predicate *fcp;
4742 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4743 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4744 {
4745 fcp->predicate->stream_out (ob);
4746 fcp->freq.stream_out (ob);
4747 }
4748 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4749 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4750 {
4751 fcp->predicate->stream_out (ob);
4752 fcp->freq.stream_out (ob);
4753 }
4754 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4755 int ip;
4756 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4757 i++)
4758 streamer_write_uhwi (ob, ip);
4759 for (edge = cnode->callees; edge; edge = edge->next_callee)
4760 write_ipa_call_summary (ob, edge);
4761 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4762 write_ipa_call_summary (ob, edge);
4763 }
4764 }
4765 streamer_write_char_stream (ob->main_stream, 0);
4766 produce_asm (ob, NULL);
4767 destroy_output_block (ob);
4768
4769 ipa_prop_write_jump_functions ();
4770 }
4771
4772
4773 /* Release function summary. */
4774
4775 void
ipa_free_fn_summary(void)4776 ipa_free_fn_summary (void)
4777 {
4778 if (!ipa_call_summaries)
4779 return;
4780 ggc_delete (ipa_fn_summaries);
4781 ipa_fn_summaries = NULL;
4782 delete ipa_call_summaries;
4783 ipa_call_summaries = NULL;
4784 edge_predicate_pool.release ();
4785 /* During IPA this is one of largest datastructures to release. */
4786 if (flag_wpa)
4787 ggc_trim ();
4788 }
4789
4790 /* Release function summary. */
4791
4792 void
ipa_free_size_summary(void)4793 ipa_free_size_summary (void)
4794 {
4795 if (!ipa_size_summaries)
4796 return;
4797 delete ipa_size_summaries;
4798 ipa_size_summaries = NULL;
4799 }
4800
4801 namespace {
4802
4803 const pass_data pass_data_local_fn_summary =
4804 {
4805 GIMPLE_PASS, /* type */
4806 "local-fnsummary", /* name */
4807 OPTGROUP_INLINE, /* optinfo_flags */
4808 TV_INLINE_PARAMETERS, /* tv_id */
4809 0, /* properties_required */
4810 0, /* properties_provided */
4811 0, /* properties_destroyed */
4812 0, /* todo_flags_start */
4813 0, /* todo_flags_finish */
4814 };
4815
4816 class pass_local_fn_summary : public gimple_opt_pass
4817 {
4818 public:
pass_local_fn_summary(gcc::context * ctxt)4819 pass_local_fn_summary (gcc::context *ctxt)
4820 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4821 {}
4822
4823 /* opt_pass methods: */
clone()4824 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
execute(function *)4825 virtual unsigned int execute (function *)
4826 {
4827 return compute_fn_summary_for_current ();
4828 }
4829
4830 }; // class pass_local_fn_summary
4831
4832 } // anon namespace
4833
4834 gimple_opt_pass *
make_pass_local_fn_summary(gcc::context * ctxt)4835 make_pass_local_fn_summary (gcc::context *ctxt)
4836 {
4837 return new pass_local_fn_summary (ctxt);
4838 }
4839
4840
4841 /* Free inline summary. */
4842
4843 namespace {
4844
4845 const pass_data pass_data_ipa_free_fn_summary =
4846 {
4847 SIMPLE_IPA_PASS, /* type */
4848 "free-fnsummary", /* name */
4849 OPTGROUP_NONE, /* optinfo_flags */
4850 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4851 0, /* properties_required */
4852 0, /* properties_provided */
4853 0, /* properties_destroyed */
4854 0, /* todo_flags_start */
4855 0, /* todo_flags_finish */
4856 };
4857
4858 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4859 {
4860 public:
pass_ipa_free_fn_summary(gcc::context * ctxt)4861 pass_ipa_free_fn_summary (gcc::context *ctxt)
4862 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4863 small_p (false)
4864 {}
4865
4866 /* opt_pass methods: */
clone()4867 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
set_pass_param(unsigned int n,bool param)4868 void set_pass_param (unsigned int n, bool param)
4869 {
4870 gcc_assert (n == 0);
4871 small_p = param;
4872 }
gate(function *)4873 virtual bool gate (function *) { return true; }
execute(function *)4874 virtual unsigned int execute (function *)
4875 {
4876 ipa_free_fn_summary ();
4877 /* Free ipa-prop structures if they are no longer needed. */
4878 ipa_free_all_structures_after_iinln ();
4879 if (!flag_wpa)
4880 ipa_free_size_summary ();
4881 return 0;
4882 }
4883
4884 private:
4885 bool small_p;
4886 }; // class pass_ipa_free_fn_summary
4887
4888 } // anon namespace
4889
4890 simple_ipa_opt_pass *
make_pass_ipa_free_fn_summary(gcc::context * ctxt)4891 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4892 {
4893 return new pass_ipa_free_fn_summary (ctxt);
4894 }
4895
4896 namespace {
4897
4898 const pass_data pass_data_ipa_fn_summary =
4899 {
4900 IPA_PASS, /* type */
4901 "fnsummary", /* name */
4902 OPTGROUP_INLINE, /* optinfo_flags */
4903 TV_IPA_FNSUMMARY, /* tv_id */
4904 0, /* properties_required */
4905 0, /* properties_provided */
4906 0, /* properties_destroyed */
4907 0, /* todo_flags_start */
4908 ( TODO_dump_symtab ), /* todo_flags_finish */
4909 };
4910
4911 class pass_ipa_fn_summary : public ipa_opt_pass_d
4912 {
4913 public:
pass_ipa_fn_summary(gcc::context * ctxt)4914 pass_ipa_fn_summary (gcc::context *ctxt)
4915 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4916 ipa_fn_summary_generate, /* generate_summary */
4917 ipa_fn_summary_write, /* write_summary */
4918 ipa_fn_summary_read, /* read_summary */
4919 NULL, /* write_optimization_summary */
4920 NULL, /* read_optimization_summary */
4921 NULL, /* stmt_fixup */
4922 0, /* function_transform_todo_flags_start */
4923 NULL, /* function_transform */
4924 NULL) /* variable_transform */
4925 {}
4926
4927 /* opt_pass methods: */
execute(function *)4928 virtual unsigned int execute (function *) { return 0; }
4929
4930 }; // class pass_ipa_fn_summary
4931
4932 } // anon namespace
4933
4934 ipa_opt_pass_d *
make_pass_ipa_fn_summary(gcc::context * ctxt)4935 make_pass_ipa_fn_summary (gcc::context *ctxt)
4936 {
4937 return new pass_ipa_fn_summary (ctxt);
4938 }
4939
4940 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4941 within the same process. For use by toplev::finalize. */
4942
4943 void
ipa_fnsummary_c_finalize(void)4944 ipa_fnsummary_c_finalize (void)
4945 {
4946 ipa_free_fn_summary ();
4947 }
4948