1 /* Function summary pass.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85 #include "tree-into-ssa.h"
86
87 /* Summaries. */
88 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
89 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
90 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
91
92 /* Edge predicates goes here. */
93 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
94
95
96 /* Dump IPA hints. */
97 void
ipa_dump_hints(FILE * f,ipa_hints hints)98 ipa_dump_hints (FILE *f, ipa_hints hints)
99 {
100 if (!hints)
101 return;
102 fprintf (f, "IPA hints:");
103 if (hints & INLINE_HINT_indirect_call)
104 {
105 hints &= ~INLINE_HINT_indirect_call;
106 fprintf (f, " indirect_call");
107 }
108 if (hints & INLINE_HINT_loop_iterations)
109 {
110 hints &= ~INLINE_HINT_loop_iterations;
111 fprintf (f, " loop_iterations");
112 }
113 if (hints & INLINE_HINT_loop_stride)
114 {
115 hints &= ~INLINE_HINT_loop_stride;
116 fprintf (f, " loop_stride");
117 }
118 if (hints & INLINE_HINT_same_scc)
119 {
120 hints &= ~INLINE_HINT_same_scc;
121 fprintf (f, " same_scc");
122 }
123 if (hints & INLINE_HINT_in_scc)
124 {
125 hints &= ~INLINE_HINT_in_scc;
126 fprintf (f, " in_scc");
127 }
128 if (hints & INLINE_HINT_cross_module)
129 {
130 hints &= ~INLINE_HINT_cross_module;
131 fprintf (f, " cross_module");
132 }
133 if (hints & INLINE_HINT_declared_inline)
134 {
135 hints &= ~INLINE_HINT_declared_inline;
136 fprintf (f, " declared_inline");
137 }
138 if (hints & INLINE_HINT_known_hot)
139 {
140 hints &= ~INLINE_HINT_known_hot;
141 fprintf (f, " known_hot");
142 }
143 gcc_assert (!hints);
144 }
145
146
147 /* Record SIZE and TIME to SUMMARY.
148 The accounted code will be executed when EXEC_PRED is true.
149 When NONCONST_PRED is false the code will evaluate to constant and
150 will get optimized out in specialized clones of the function.
151 If CALL is true account to call_size_time_table rather than
152 size_time_table. */
153
154 void
account_size_time(int size,sreal time,const predicate & exec_pred,const predicate & nonconst_pred_in,bool call)155 ipa_fn_summary::account_size_time (int size, sreal time,
156 const predicate &exec_pred,
157 const predicate &nonconst_pred_in,
158 bool call)
159 {
160 size_time_entry *e;
161 bool found = false;
162 int i;
163 predicate nonconst_pred;
164 vec<size_time_entry, va_gc> *table = call
165 ? call_size_time_table : size_time_table;
166
167 if (exec_pred == false)
168 return;
169
170 nonconst_pred = nonconst_pred_in & exec_pred;
171
172 if (nonconst_pred == false)
173 return;
174
175 /* We need to create initial empty unconditional clause, but otherwise
176 we don't need to account empty times and sizes. */
177 if (!size && time == 0 && table)
178 return;
179
180 /* Only for calls we are unaccounting what we previously recorded. */
181 gcc_checking_assert (time >= 0 || call);
182
183 for (i = 0; vec_safe_iterate (table, i, &e); i++)
184 if (e->exec_predicate == exec_pred
185 && e->nonconst_predicate == nonconst_pred)
186 {
187 found = true;
188 break;
189 }
190 if (i == max_size_time_table_size)
191 {
192 i = 0;
193 found = true;
194 e = &(*table)[0];
195 if (dump_file && (dump_flags & TDF_DETAILS))
196 fprintf (dump_file,
197 "\t\tReached limit on number of entries, "
198 "ignoring the predicate.");
199 }
200 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
201 {
202 fprintf (dump_file,
203 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
204 ((double) size) / ipa_fn_summary::size_scale,
205 (time.to_double ()), found ? "" : "new ");
206 exec_pred.dump (dump_file, conds, 0);
207 if (exec_pred != nonconst_pred)
208 {
209 fprintf (dump_file, " nonconst:");
210 nonconst_pred.dump (dump_file, conds);
211 }
212 else
213 fprintf (dump_file, "\n");
214 }
215 if (!found)
216 {
217 class size_time_entry new_entry;
218 new_entry.size = size;
219 new_entry.time = time;
220 new_entry.exec_predicate = exec_pred;
221 new_entry.nonconst_predicate = nonconst_pred;
222 if (call)
223 vec_safe_push (call_size_time_table, new_entry);
224 else
225 vec_safe_push (size_time_table, new_entry);
226 }
227 else
228 {
229 e->size += size;
230 e->time += time;
231 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
232 /* Tolerate small roundoff issues. */
233 if (e->time < 0)
234 e->time = 0;
235 }
236 }
237
238 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
239
240 static struct cgraph_edge *
redirect_to_unreachable(struct cgraph_edge * e)241 redirect_to_unreachable (struct cgraph_edge *e)
242 {
243 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
244 struct cgraph_node *target = cgraph_node::get_create
245 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
246
247 if (e->speculative)
248 e = cgraph_edge::resolve_speculation (e, target->decl);
249 else if (!e->callee)
250 e = cgraph_edge::make_direct (e, target);
251 else
252 e->redirect_callee (target);
253 class ipa_call_summary *es = ipa_call_summaries->get (e);
254 e->inline_failed = CIF_UNREACHABLE;
255 e->count = profile_count::zero ();
256 es->call_stmt_size = 0;
257 es->call_stmt_time = 0;
258 if (callee)
259 callee->remove_symbol_and_inline_clones ();
260 return e;
261 }
262
263 /* Set predicate for edge E. */
264
265 static void
edge_set_predicate(struct cgraph_edge * e,predicate * predicate)266 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
267 {
268 /* If the edge is determined to be never executed, redirect it
269 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
270 be optimized out. */
271 if (predicate && *predicate == false
272 /* When handling speculative edges, we need to do the redirection
273 just once. Do it always on the direct edge, so we do not
274 attempt to resolve speculation while duplicating the edge. */
275 && (!e->speculative || e->callee))
276 e = redirect_to_unreachable (e);
277
278 class ipa_call_summary *es = ipa_call_summaries->get (e);
279 if (predicate && *predicate != true)
280 {
281 if (!es->predicate)
282 es->predicate = edge_predicate_pool.allocate ();
283 *es->predicate = *predicate;
284 }
285 else
286 {
287 if (es->predicate)
288 edge_predicate_pool.remove (es->predicate);
289 es->predicate = NULL;
290 }
291 }
292
293 /* Set predicate for hint *P. */
294
295 static void
set_hint_predicate(predicate ** p,predicate new_predicate)296 set_hint_predicate (predicate **p, predicate new_predicate)
297 {
298 if (new_predicate == false || new_predicate == true)
299 {
300 if (*p)
301 edge_predicate_pool.remove (*p);
302 *p = NULL;
303 }
304 else
305 {
306 if (!*p)
307 *p = edge_predicate_pool.allocate ();
308 **p = new_predicate;
309 }
310 }
311
312
313 /* Compute what conditions may or may not hold given information about
314 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
315 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
316 copy when called in a given context. It is a bitmask of conditions. Bit
317 0 means that condition is known to be false, while bit 1 means that condition
318 may or may not be true. These differs - for example NOT_INLINED condition
319 is always false in the second and also builtin_constant_p tests cannot use
320 the fact that parameter is indeed a constant.
321
322 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
323 KNOWN_AGGS is a vector of aggregate known offset/value set for each
324 parameter. Return clause of possible truths. When INLINE_P is true, assume
325 that we are inlining.
326
327 ERROR_MARK means compile time invariant. */
328
329 static void
evaluate_conditions_for_known_args(struct cgraph_node * node,bool inline_p,vec<tree> known_vals,vec<value_range> known_value_ranges,vec<ipa_agg_value_set> known_aggs,clause_t * ret_clause,clause_t * ret_nonspec_clause)330 evaluate_conditions_for_known_args (struct cgraph_node *node,
331 bool inline_p,
332 vec<tree> known_vals,
333 vec<value_range> known_value_ranges,
334 vec<ipa_agg_value_set> known_aggs,
335 clause_t *ret_clause,
336 clause_t *ret_nonspec_clause)
337 {
338 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
339 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
340 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
341 int i;
342 struct condition *c;
343
344 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
345 {
346 tree val = NULL;
347 tree res;
348 int j;
349 struct expr_eval_op *op;
350
351 /* We allow call stmt to have fewer arguments than the callee function
352 (especially for K&R style programs). So bound check here (we assume
353 known_aggs vector, if non-NULL, has the same length as
354 known_vals). */
355 gcc_checking_assert (!known_aggs.length () || !known_vals.length ()
356 || (known_vals.length () == known_aggs.length ()));
357
358 if (c->agg_contents)
359 {
360 struct ipa_agg_value_set *agg;
361
362 if (c->code == predicate::changed
363 && !c->by_ref
364 && c->operand_num < (int)known_vals.length ()
365 && (known_vals[c->operand_num] == error_mark_node))
366 continue;
367
368 if (c->operand_num < (int)known_aggs.length ())
369 {
370 agg = &known_aggs[c->operand_num];
371 val = ipa_find_agg_cst_for_param (agg,
372 c->operand_num
373 < (int) known_vals.length ()
374 ? known_vals[c->operand_num]
375 : NULL,
376 c->offset, c->by_ref);
377 }
378 else
379 val = NULL_TREE;
380 }
381 else if (c->operand_num < (int) known_vals.length ())
382 {
383 val = known_vals[c->operand_num];
384 if (val == error_mark_node && c->code != predicate::changed)
385 val = NULL_TREE;
386 }
387
388 if (!val
389 && (c->code == predicate::changed
390 || c->code == predicate::is_not_constant))
391 {
392 clause |= 1 << (i + predicate::first_dynamic_condition);
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396 if (c->code == predicate::changed)
397 {
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401
402 if (c->code == predicate::is_not_constant)
403 {
404 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
405 continue;
406 }
407
408 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
409 {
410 if (c->type != TREE_TYPE (val))
411 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
412 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
413 {
414 if (!val)
415 break;
416 if (!op->val[0])
417 val = fold_unary (op->code, op->type, val);
418 else if (!op->val[1])
419 val = fold_binary (op->code, op->type,
420 op->index ? op->val[0] : val,
421 op->index ? val : op->val[0]);
422 else if (op->index == 0)
423 val = fold_ternary (op->code, op->type,
424 val, op->val[0], op->val[1]);
425 else if (op->index == 1)
426 val = fold_ternary (op->code, op->type,
427 op->val[0], val, op->val[1]);
428 else if (op->index == 2)
429 val = fold_ternary (op->code, op->type,
430 op->val[0], op->val[1], val);
431 else
432 val = NULL_TREE;
433 }
434
435 res = val
436 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
437 : NULL;
438
439 if (res && integer_zerop (res))
440 continue;
441 if (res && integer_onep (res))
442 {
443 clause |= 1 << (i + predicate::first_dynamic_condition);
444 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
445 continue;
446 }
447 }
448 if (c->operand_num < (int) known_value_ranges.length ()
449 && !c->agg_contents
450 && !known_value_ranges[c->operand_num].undefined_p ()
451 && !known_value_ranges[c->operand_num].varying_p ()
452 && TYPE_SIZE (c->type)
453 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
454 && (!val || TREE_CODE (val) != INTEGER_CST))
455 {
456 value_range vr = known_value_ranges[c->operand_num];
457 if (!useless_type_conversion_p (c->type, vr.type ()))
458 {
459 value_range res;
460 range_fold_unary_expr (&res, NOP_EXPR,
461 c->type, &vr, vr.type ());
462 vr = res;
463 }
464 tree type = c->type;
465
466 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
467 {
468 if (vr.varying_p () || vr.undefined_p ())
469 break;
470
471 value_range res;
472 if (!op->val[0])
473 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
474 else if (!op->val[1])
475 {
476 value_range op0 (op->val[0], op->val[0]);
477 range_fold_binary_expr (&res, op->code, op->type,
478 op->index ? &op0 : &vr,
479 op->index ? &vr : &op0);
480 }
481 else
482 gcc_unreachable ();
483 type = op->type;
484 vr = res;
485 }
486 if (!vr.varying_p () && !vr.undefined_p ())
487 {
488 value_range res;
489 value_range val_vr (c->val, c->val);
490 range_fold_binary_expr (&res, c->code, boolean_type_node,
491 &vr,
492 &val_vr);
493 if (res.zero_p ())
494 continue;
495 }
496 }
497
498 clause |= 1 << (i + predicate::first_dynamic_condition);
499 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
500 }
501 *ret_clause = clause;
502 if (ret_nonspec_clause)
503 *ret_nonspec_clause = nonspec_clause;
504 }
505
506 /* Return true if VRP will be exectued on the function.
507 We do not want to anticipate optimizations that will not happen.
508
509 FIXME: This can be confused with -fdisable and debug counters and thus
510 it should not be used for correctness (only to make heuristics work).
511 This means that inliner should do its own optimizations of expressions
512 that it predicts to be constant so wrong code can not be triggered by
513 builtin_constant_p. */
514
515 static bool
vrp_will_run_p(struct cgraph_node * node)516 vrp_will_run_p (struct cgraph_node *node)
517 {
518 return (opt_for_fn (node->decl, optimize)
519 && !opt_for_fn (node->decl, optimize_debug)
520 && opt_for_fn (node->decl, flag_tree_vrp));
521 }
522
523 /* Similarly about FRE. */
524
525 static bool
fre_will_run_p(struct cgraph_node * node)526 fre_will_run_p (struct cgraph_node *node)
527 {
528 return (opt_for_fn (node->decl, optimize)
529 && !opt_for_fn (node->decl, optimize_debug)
530 && opt_for_fn (node->decl, flag_tree_fre));
531 }
532
533 /* Work out what conditions might be true at invocation of E.
534 Compute costs for inlined edge if INLINE_P is true.
535
536 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
537 (if non-NULL) conditions evaluated for nonspecialized clone called
538 in a given context.
539
540 KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by
541 known constant and aggregate values of parameters.
542
543 KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts
544 of parameter used by a polymorphic call. */
545
546 void
evaluate_properties_for_edge(struct cgraph_edge * e,bool inline_p,clause_t * clause_ptr,clause_t * nonspec_clause_ptr,vec<tree> * known_vals_ptr,vec<ipa_polymorphic_call_context> * known_contexts_ptr,vec<ipa_agg_value_set> * known_aggs_ptr)547 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
548 clause_t *clause_ptr,
549 clause_t *nonspec_clause_ptr,
550 vec<tree> *known_vals_ptr,
551 vec<ipa_polymorphic_call_context>
552 *known_contexts_ptr,
553 vec<ipa_agg_value_set> *known_aggs_ptr)
554 {
555 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
556 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
557 auto_vec<value_range, 32> known_value_ranges;
558 class ipa_edge_args *args;
559
560 if (clause_ptr)
561 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
562
563 if (ipa_node_params_sum
564 && !e->call_stmt_cannot_inline_p
565 && (info->conds || known_contexts_ptr)
566 && (args = IPA_EDGE_REF (e)) != NULL)
567 {
568 struct cgraph_node *caller;
569 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
570 class ipa_call_summary *es = ipa_call_summaries->get (e);
571 int i, count = ipa_get_cs_argument_count (args);
572
573 if (count)
574 {
575 if (e->caller->inlined_to)
576 caller = e->caller->inlined_to;
577 else
578 caller = e->caller;
579 caller_parms_info = IPA_NODE_REF (caller);
580 callee_pi = IPA_NODE_REF (callee);
581
582 /* Watch for thunks. */
583 if (callee_pi)
584 /* Watch for variadic functions. */
585 count = MIN (count, ipa_get_param_count (callee_pi));
586 }
587
588 if (callee_pi)
589 for (i = 0; i < count; i++)
590 {
591 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
592
593 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
594 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
595 {
596 /* Determine if we know constant value of the parameter. */
597 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
598 ipa_get_type (callee_pi, i));
599
600 if (!cst && e->call_stmt
601 && i < (int)gimple_call_num_args (e->call_stmt))
602 {
603 cst = gimple_call_arg (e->call_stmt, i);
604 if (!is_gimple_min_invariant (cst))
605 cst = NULL;
606 }
607 if (cst)
608 {
609 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
610 if (!known_vals_ptr->length ())
611 vec_safe_grow_cleared (known_vals_ptr, count);
612 (*known_vals_ptr)[i] = cst;
613 }
614 else if (inline_p && !es->param[i].change_prob)
615 {
616 if (!known_vals_ptr->length ())
617 vec_safe_grow_cleared (known_vals_ptr, count);
618 (*known_vals_ptr)[i] = error_mark_node;
619 }
620
621 /* If we failed to get simple constant, try value range. */
622 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
623 && vrp_will_run_p (caller)
624 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
625 {
626 value_range vr
627 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
628 ipa_get_type (callee_pi,
629 i));
630 if (!vr.undefined_p () && !vr.varying_p ())
631 {
632 if (!known_value_ranges.length ())
633 known_value_ranges.safe_grow_cleared (count);
634 known_value_ranges[i] = vr;
635 }
636 }
637
638 /* Determine known aggregate values. */
639 if (fre_will_run_p (caller))
640 {
641 ipa_agg_value_set agg
642 = ipa_agg_value_set_from_jfunc (caller_parms_info,
643 caller, &jf->agg);
644 if (agg.items.length ())
645 {
646 if (!known_aggs_ptr->length ())
647 vec_safe_grow_cleared (known_aggs_ptr, count);
648 (*known_aggs_ptr)[i] = agg;
649 }
650 }
651 }
652
653 /* For calls used in polymorphic calls we further determine
654 polymorphic call context. */
655 if (known_contexts_ptr
656 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
657 {
658 ipa_polymorphic_call_context
659 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
660 if (!ctx.useless_p ())
661 {
662 if (!known_contexts_ptr->length ())
663 known_contexts_ptr->safe_grow_cleared (count);
664 (*known_contexts_ptr)[i]
665 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
666 }
667 }
668 }
669 else
670 gcc_assert (!count || callee->thunk.thunk_p);
671 }
672 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
673 {
674 int i, count = (int)gimple_call_num_args (e->call_stmt);
675
676 for (i = 0; i < count; i++)
677 {
678 tree cst = gimple_call_arg (e->call_stmt, i);
679 if (!is_gimple_min_invariant (cst))
680 cst = NULL;
681 if (cst)
682 {
683 if (!known_vals_ptr->length ())
684 vec_safe_grow_cleared (known_vals_ptr, count);
685 (*known_vals_ptr)[i] = cst;
686 }
687 }
688 }
689
690 evaluate_conditions_for_known_args (callee, inline_p,
691 *known_vals_ptr,
692 known_value_ranges,
693 *known_aggs_ptr,
694 clause_ptr,
695 nonspec_clause_ptr);
696 }
697
698
699 /* Allocate the function summary. */
700
701 static void
ipa_fn_summary_alloc(void)702 ipa_fn_summary_alloc (void)
703 {
704 gcc_checking_assert (!ipa_fn_summaries);
705 ipa_size_summaries = new ipa_size_summary_t (symtab);
706 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
707 ipa_call_summaries = new ipa_call_summary_t (symtab);
708 }
709
~ipa_call_summary()710 ipa_call_summary::~ipa_call_summary ()
711 {
712 if (predicate)
713 edge_predicate_pool.remove (predicate);
714
715 param.release ();
716 }
717
~ipa_fn_summary()718 ipa_fn_summary::~ipa_fn_summary ()
719 {
720 if (loop_iterations)
721 edge_predicate_pool.remove (loop_iterations);
722 if (loop_stride)
723 edge_predicate_pool.remove (loop_stride);
724 vec_free (conds);
725 vec_free (size_time_table);
726 vec_free (call_size_time_table);
727 }
728
729 void
remove_callees(cgraph_node * node)730 ipa_fn_summary_t::remove_callees (cgraph_node *node)
731 {
732 cgraph_edge *e;
733 for (e = node->callees; e; e = e->next_callee)
734 ipa_call_summaries->remove (e);
735 for (e = node->indirect_calls; e; e = e->next_callee)
736 ipa_call_summaries->remove (e);
737 }
738
739 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
740 Additionally care about allocating new memory slot for updated predicate
741 and set it to NULL when it becomes true or false (and thus uninteresting).
742 */
743
744 static void
remap_hint_predicate_after_duplication(predicate ** p,clause_t possible_truths)745 remap_hint_predicate_after_duplication (predicate **p,
746 clause_t possible_truths)
747 {
748 predicate new_predicate;
749
750 if (!*p)
751 return;
752
753 new_predicate = (*p)->remap_after_duplication (possible_truths);
754 /* We do not want to free previous predicate; it is used by node origin. */
755 *p = NULL;
756 set_hint_predicate (p, new_predicate);
757 }
758
759
760 /* Hook that is called by cgraph.c when a node is duplicated. */
761 void
duplicate(cgraph_node * src,cgraph_node * dst,ipa_fn_summary *,ipa_fn_summary * info)762 ipa_fn_summary_t::duplicate (cgraph_node *src,
763 cgraph_node *dst,
764 ipa_fn_summary *,
765 ipa_fn_summary *info)
766 {
767 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
768 /* TODO: as an optimization, we may avoid copying conditions
769 that are known to be false or true. */
770 info->conds = vec_safe_copy (info->conds);
771
772 /* When there are any replacements in the function body, see if we can figure
773 out that something was optimized out. */
774 if (ipa_node_params_sum && dst->clone.tree_map)
775 {
776 vec<size_time_entry, va_gc> *entry = info->size_time_table;
777 /* Use SRC parm info since it may not be copied yet. */
778 class ipa_node_params *parms_info = IPA_NODE_REF (src);
779 vec<tree> known_vals = vNULL;
780 int count = ipa_get_param_count (parms_info);
781 int i, j;
782 clause_t possible_truths;
783 predicate true_pred = true;
784 size_time_entry *e;
785 int optimized_out_size = 0;
786 bool inlined_to_p = false;
787 struct cgraph_edge *edge, *next;
788
789 info->size_time_table = 0;
790 known_vals.safe_grow_cleared (count);
791 for (i = 0; i < count; i++)
792 {
793 struct ipa_replace_map *r;
794
795 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
796 {
797 if (r->parm_num == i)
798 {
799 known_vals[i] = r->new_tree;
800 break;
801 }
802 }
803 }
804 evaluate_conditions_for_known_args (dst, false,
805 known_vals,
806 vNULL,
807 vNULL,
808 &possible_truths,
809 /* We are going to specialize,
810 so ignore nonspec truths. */
811 NULL);
812 known_vals.release ();
813
814 info->account_size_time (0, 0, true_pred, true_pred);
815
816 /* Remap size_time vectors.
817 Simplify the predicate by pruning out alternatives that are known
818 to be false.
819 TODO: as on optimization, we can also eliminate conditions known
820 to be true. */
821 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
822 {
823 predicate new_exec_pred;
824 predicate new_nonconst_pred;
825 new_exec_pred = e->exec_predicate.remap_after_duplication
826 (possible_truths);
827 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
828 (possible_truths);
829 if (new_exec_pred == false || new_nonconst_pred == false)
830 optimized_out_size += e->size;
831 else
832 info->account_size_time (e->size, e->time, new_exec_pred,
833 new_nonconst_pred);
834 }
835
836 /* Remap edge predicates with the same simplification as above.
837 Also copy constantness arrays. */
838 for (edge = dst->callees; edge; edge = next)
839 {
840 predicate new_predicate;
841 class ipa_call_summary *es = ipa_call_summaries->get (edge);
842 next = edge->next_callee;
843
844 if (!edge->inline_failed)
845 inlined_to_p = true;
846 if (!es->predicate)
847 continue;
848 new_predicate = es->predicate->remap_after_duplication
849 (possible_truths);
850 if (new_predicate == false && *es->predicate != false)
851 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
852 edge_set_predicate (edge, &new_predicate);
853 }
854
855 /* Remap indirect edge predicates with the same simplification as above.
856 Also copy constantness arrays. */
857 for (edge = dst->indirect_calls; edge; edge = next)
858 {
859 predicate new_predicate;
860 class ipa_call_summary *es = ipa_call_summaries->get (edge);
861 next = edge->next_callee;
862
863 gcc_checking_assert (edge->inline_failed);
864 if (!es->predicate)
865 continue;
866 new_predicate = es->predicate->remap_after_duplication
867 (possible_truths);
868 if (new_predicate == false && *es->predicate != false)
869 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
870 edge_set_predicate (edge, &new_predicate);
871 }
872 remap_hint_predicate_after_duplication (&info->loop_iterations,
873 possible_truths);
874 remap_hint_predicate_after_duplication (&info->loop_stride,
875 possible_truths);
876
877 /* If inliner or someone after inliner will ever start producing
878 non-trivial clones, we will get trouble with lack of information
879 about updating self sizes, because size vectors already contains
880 sizes of the callees. */
881 gcc_assert (!inlined_to_p || !optimized_out_size);
882 }
883 else
884 {
885 info->size_time_table = vec_safe_copy (info->size_time_table);
886 if (info->loop_iterations)
887 {
888 predicate p = *info->loop_iterations;
889 info->loop_iterations = NULL;
890 set_hint_predicate (&info->loop_iterations, p);
891 }
892 if (info->loop_stride)
893 {
894 predicate p = *info->loop_stride;
895 info->loop_stride = NULL;
896 set_hint_predicate (&info->loop_stride, p);
897 }
898 }
899 if (!dst->inlined_to)
900 ipa_update_overall_fn_summary (dst);
901 }
902
903
904 /* Hook that is called by cgraph.c when a node is duplicated. */
905
906 void
duplicate(struct cgraph_edge * src,struct cgraph_edge * dst,class ipa_call_summary * srcinfo,class ipa_call_summary * info)907 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
908 struct cgraph_edge *dst,
909 class ipa_call_summary *srcinfo,
910 class ipa_call_summary *info)
911 {
912 new (info) ipa_call_summary (*srcinfo);
913 info->predicate = NULL;
914 edge_set_predicate (dst, srcinfo->predicate);
915 info->param = srcinfo->param.copy ();
916 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
917 {
918 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
919 - eni_size_weights.call_cost);
920 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
921 - eni_time_weights.call_cost);
922 }
923 }
924
925 /* Dump edge summaries associated to NODE and recursively to all clones.
926 Indent by INDENT. */
927
928 static void
dump_ipa_call_summary(FILE * f,int indent,struct cgraph_node * node,class ipa_fn_summary * info)929 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
930 class ipa_fn_summary *info)
931 {
932 struct cgraph_edge *edge;
933 for (edge = node->callees; edge; edge = edge->next_callee)
934 {
935 class ipa_call_summary *es = ipa_call_summaries->get (edge);
936 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
937 int i;
938
939 fprintf (f,
940 "%*s%s %s\n%*s freq:%4.2f",
941 indent, "", callee->dump_name (),
942 !edge->inline_failed
943 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
944 indent, "", edge->sreal_frequency ().to_double ());
945
946 if (cross_module_call_p (edge))
947 fprintf (f, " cross module");
948
949 if (es)
950 fprintf (f, " loop depth:%2i size:%2i time: %2i",
951 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
952
953 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
954 ipa_size_summary *ss = ipa_size_summaries->get (callee);
955 if (s != NULL)
956 fprintf (f, " callee size:%2i stack:%2i",
957 (int) (ss->size / ipa_fn_summary::size_scale),
958 (int) s->estimated_stack_size);
959
960 if (es && es->predicate)
961 {
962 fprintf (f, " predicate: ");
963 es->predicate->dump (f, info->conds);
964 }
965 else
966 fprintf (f, "\n");
967 if (es && es->param.exists ())
968 for (i = 0; i < (int) es->param.length (); i++)
969 {
970 int prob = es->param[i].change_prob;
971
972 if (!prob)
973 fprintf (f, "%*s op%i is compile time invariant\n",
974 indent + 2, "", i);
975 else if (prob != REG_BR_PROB_BASE)
976 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
977 prob * 100.0 / REG_BR_PROB_BASE);
978 }
979 if (!edge->inline_failed)
980 {
981 ipa_size_summary *ss = ipa_size_summaries->get (callee);
982 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
983 indent + 2, "",
984 (int) ipa_get_stack_frame_offset (callee),
985 (int) ss->estimated_self_stack_size);
986 dump_ipa_call_summary (f, indent + 2, callee, info);
987 }
988 }
989 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
990 {
991 class ipa_call_summary *es = ipa_call_summaries->get (edge);
992 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
993 " time: %2i",
994 indent, "",
995 es->loop_depth,
996 edge->sreal_frequency ().to_double (), es->call_stmt_size,
997 es->call_stmt_time);
998 if (es->predicate)
999 {
1000 fprintf (f, "predicate: ");
1001 es->predicate->dump (f, info->conds);
1002 }
1003 else
1004 fprintf (f, "\n");
1005 }
1006 }
1007
1008
1009 void
ipa_dump_fn_summary(FILE * f,struct cgraph_node * node)1010 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1011 {
1012 if (node->definition)
1013 {
1014 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1015 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1016 if (s != NULL)
1017 {
1018 size_time_entry *e;
1019 int i;
1020 fprintf (f, "IPA function summary for %s", node->dump_name ());
1021 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1022 fprintf (f, " always_inline");
1023 if (s->inlinable)
1024 fprintf (f, " inlinable");
1025 if (s->fp_expressions)
1026 fprintf (f, " fp_expression");
1027 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1028 fprintf (f, " self size: %i\n", ss->self_size);
1029 fprintf (f, " global size: %i\n", ss->size);
1030 fprintf (f, " min size: %i\n", s->min_size);
1031 fprintf (f, " self stack: %i\n",
1032 (int) ss->estimated_self_stack_size);
1033 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1034 if (s->growth)
1035 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1036 if (s->scc_no)
1037 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1038 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1039 {
1040 fprintf (f, " size:%f, time:%f",
1041 (double) e->size / ipa_fn_summary::size_scale,
1042 e->time.to_double ());
1043 if (e->exec_predicate != true)
1044 {
1045 fprintf (f, ", executed if:");
1046 e->exec_predicate.dump (f, s->conds, 0);
1047 }
1048 if (e->exec_predicate != e->nonconst_predicate)
1049 {
1050 fprintf (f, ", nonconst if:");
1051 e->nonconst_predicate.dump (f, s->conds, 0);
1052 }
1053 fprintf (f, "\n");
1054 }
1055 if (s->loop_iterations)
1056 {
1057 fprintf (f, " loop iterations:");
1058 s->loop_iterations->dump (f, s->conds);
1059 }
1060 if (s->loop_stride)
1061 {
1062 fprintf (f, " loop stride:");
1063 s->loop_stride->dump (f, s->conds);
1064 }
1065 fprintf (f, " calls:\n");
1066 dump_ipa_call_summary (f, 4, node, s);
1067 fprintf (f, "\n");
1068 }
1069 else
1070 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1071 }
1072 }
1073
1074 DEBUG_FUNCTION void
ipa_debug_fn_summary(struct cgraph_node * node)1075 ipa_debug_fn_summary (struct cgraph_node *node)
1076 {
1077 ipa_dump_fn_summary (stderr, node);
1078 }
1079
1080 void
ipa_dump_fn_summaries(FILE * f)1081 ipa_dump_fn_summaries (FILE *f)
1082 {
1083 struct cgraph_node *node;
1084
1085 FOR_EACH_DEFINED_FUNCTION (node)
1086 if (!node->inlined_to)
1087 ipa_dump_fn_summary (f, node);
1088 }
1089
1090 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1091 boolean variable pointed to by DATA. */
1092
1093 static bool
mark_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef ATTRIBUTE_UNUSED,void * data)1094 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1095 void *data)
1096 {
1097 bool *b = (bool *) data;
1098 *b = true;
1099 return true;
1100 }
1101
1102 /* If OP refers to value of function parameter, return the corresponding
1103 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1104 PARM_DECL) will be stored to *SIZE_P in that case too. */
1105
1106 static tree
unmodified_parm_1(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1107 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1108 poly_int64 *size_p)
1109 {
1110 /* SSA_NAME referring to parm default def? */
1111 if (TREE_CODE (op) == SSA_NAME
1112 && SSA_NAME_IS_DEFAULT_DEF (op)
1113 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1114 {
1115 if (size_p)
1116 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1117 return SSA_NAME_VAR (op);
1118 }
1119 /* Non-SSA parm reference? */
1120 if (TREE_CODE (op) == PARM_DECL)
1121 {
1122 bool modified = false;
1123
1124 ao_ref refd;
1125 ao_ref_init (&refd, op);
1126 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1127 mark_modified, &modified, NULL, NULL,
1128 fbi->aa_walk_budget + 1);
1129 if (walked < 0)
1130 {
1131 fbi->aa_walk_budget = 0;
1132 return NULL_TREE;
1133 }
1134 if (!modified)
1135 {
1136 if (size_p)
1137 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1138 return op;
1139 }
1140 }
1141 return NULL_TREE;
1142 }
1143
1144 /* If OP refers to value of function parameter, return the corresponding
1145 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1146 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1147 stored to *SIZE_P in that case too. */
1148
1149 static tree
unmodified_parm(ipa_func_body_info * fbi,gimple * stmt,tree op,poly_int64 * size_p)1150 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1151 poly_int64 *size_p)
1152 {
1153 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1154 if (res)
1155 return res;
1156
1157 if (TREE_CODE (op) == SSA_NAME
1158 && !SSA_NAME_IS_DEFAULT_DEF (op)
1159 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1160 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1161 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1162 size_p);
1163 return NULL_TREE;
1164 }
1165
1166 /* If OP refers to a value of a function parameter or value loaded from an
1167 aggregate passed to a parameter (either by value or reference), return TRUE
1168 and store the number of the parameter to *INDEX_P, the access size into
1169 *SIZE_P, and information whether and how it has been loaded from an
1170 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1171 statement in which OP is used or loaded. */
1172
1173 static bool
unmodified_parm_or_parm_agg_item(struct ipa_func_body_info * fbi,gimple * stmt,tree op,int * index_p,poly_int64 * size_p,struct agg_position_info * aggpos)1174 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1175 gimple *stmt, tree op, int *index_p,
1176 poly_int64 *size_p,
1177 struct agg_position_info *aggpos)
1178 {
1179 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1180
1181 gcc_checking_assert (aggpos);
1182 if (res)
1183 {
1184 *index_p = ipa_get_param_decl_index (fbi->info, res);
1185 if (*index_p < 0)
1186 return false;
1187 aggpos->agg_contents = false;
1188 aggpos->by_ref = false;
1189 return true;
1190 }
1191
1192 if (TREE_CODE (op) == SSA_NAME)
1193 {
1194 if (SSA_NAME_IS_DEFAULT_DEF (op)
1195 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1196 return false;
1197 stmt = SSA_NAME_DEF_STMT (op);
1198 op = gimple_assign_rhs1 (stmt);
1199 if (!REFERENCE_CLASS_P (op))
1200 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1201 aggpos);
1202 }
1203
1204 aggpos->agg_contents = true;
1205 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1206 stmt, op, index_p, &aggpos->offset,
1207 size_p, &aggpos->by_ref);
1208 }
1209
1210 /* See if statement might disappear after inlining.
1211 0 - means not eliminated
1212 1 - half of statements goes away
1213 2 - for sure it is eliminated.
1214 We are not terribly sophisticated, basically looking for simple abstraction
1215 penalty wrappers. */
1216
1217 static int
eliminated_by_inlining_prob(ipa_func_body_info * fbi,gimple * stmt)1218 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1219 {
1220 enum gimple_code code = gimple_code (stmt);
1221 enum tree_code rhs_code;
1222
1223 if (!optimize)
1224 return 0;
1225
1226 switch (code)
1227 {
1228 case GIMPLE_RETURN:
1229 return 2;
1230 case GIMPLE_ASSIGN:
1231 if (gimple_num_ops (stmt) != 2)
1232 return 0;
1233
1234 rhs_code = gimple_assign_rhs_code (stmt);
1235
1236 /* Casts of parameters, loads from parameters passed by reference
1237 and stores to return value or parameters are often free after
1238 inlining due to SRA and further combining.
1239 Assume that half of statements goes away. */
1240 if (CONVERT_EXPR_CODE_P (rhs_code)
1241 || rhs_code == VIEW_CONVERT_EXPR
1242 || rhs_code == ADDR_EXPR
1243 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1244 {
1245 tree rhs = gimple_assign_rhs1 (stmt);
1246 tree lhs = gimple_assign_lhs (stmt);
1247 tree inner_rhs = get_base_address (rhs);
1248 tree inner_lhs = get_base_address (lhs);
1249 bool rhs_free = false;
1250 bool lhs_free = false;
1251
1252 if (!inner_rhs)
1253 inner_rhs = rhs;
1254 if (!inner_lhs)
1255 inner_lhs = lhs;
1256
1257 /* Reads of parameter are expected to be free. */
1258 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1259 rhs_free = true;
1260 /* Match expressions of form &this->field. Those will most likely
1261 combine with something upstream after inlining. */
1262 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1263 {
1264 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1265 if (TREE_CODE (op) == PARM_DECL)
1266 rhs_free = true;
1267 else if (TREE_CODE (op) == MEM_REF
1268 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1269 NULL))
1270 rhs_free = true;
1271 }
1272
1273 /* When parameter is not SSA register because its address is taken
1274 and it is just copied into one, the statement will be completely
1275 free after inlining (we will copy propagate backward). */
1276 if (rhs_free && is_gimple_reg (lhs))
1277 return 2;
1278
1279 /* Reads of parameters passed by reference
1280 expected to be free (i.e. optimized out after inlining). */
1281 if (TREE_CODE (inner_rhs) == MEM_REF
1282 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1283 rhs_free = true;
1284
1285 /* Copying parameter passed by reference into gimple register is
1286 probably also going to copy propagate, but we can't be quite
1287 sure. */
1288 if (rhs_free && is_gimple_reg (lhs))
1289 lhs_free = true;
1290
1291 /* Writes to parameters, parameters passed by value and return value
1292 (either directly or passed via invisible reference) are free.
1293
1294 TODO: We ought to handle testcase like
1295 struct a {int a,b;};
1296 struct a
1297 returnstruct (void)
1298 {
1299 struct a a ={1,2};
1300 return a;
1301 }
1302
1303 This translate into:
1304
1305 returnstruct ()
1306 {
1307 int a$b;
1308 int a$a;
1309 struct a a;
1310 struct a D.2739;
1311
1312 <bb 2>:
1313 D.2739.a = 1;
1314 D.2739.b = 2;
1315 return D.2739;
1316
1317 }
1318 For that we either need to copy ipa-split logic detecting writes
1319 to return value. */
1320 if (TREE_CODE (inner_lhs) == PARM_DECL
1321 || TREE_CODE (inner_lhs) == RESULT_DECL
1322 || (TREE_CODE (inner_lhs) == MEM_REF
1323 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1324 NULL)
1325 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1326 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1327 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1328 (inner_lhs,
1329 0))) == RESULT_DECL))))
1330 lhs_free = true;
1331 if (lhs_free
1332 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1333 rhs_free = true;
1334 if (lhs_free && rhs_free)
1335 return 1;
1336 }
1337 return 0;
1338 default:
1339 return 0;
1340 }
1341 }
1342
1343 /* Analyze EXPR if it represents a series of simple operations performed on
1344 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1345 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1346 Type of the parameter or load from an aggregate via the parameter is
1347 stored in *TYPE_P. Operations on the parameter are recorded to
1348 PARAM_OPS_P if it is not NULL. */
1349
1350 static bool
1351 decompose_param_expr (struct ipa_func_body_info *fbi,
1352 gimple *stmt, tree expr,
1353 int *index_p, tree *type_p,
1354 struct agg_position_info *aggpos,
1355 expr_eval_ops *param_ops_p = NULL)
1356 {
1357 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1358 int op_count = 0;
1359
1360 if (param_ops_p)
1361 *param_ops_p = NULL;
1362
1363 while (true)
1364 {
1365 expr_eval_op eval_op;
1366 unsigned rhs_count;
1367 unsigned cst_count = 0;
1368
1369 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1370 aggpos))
1371 {
1372 tree type = TREE_TYPE (expr);
1373
1374 if (aggpos->agg_contents)
1375 {
1376 /* Stop if containing bit-field. */
1377 if (TREE_CODE (expr) == BIT_FIELD_REF
1378 || contains_bitfld_component_ref_p (expr))
1379 break;
1380 }
1381
1382 *type_p = type;
1383 return true;
1384 }
1385
1386 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1387 break;
1388
1389 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1390 break;
1391
1392 switch (gimple_assign_rhs_class (stmt))
1393 {
1394 case GIMPLE_SINGLE_RHS:
1395 expr = gimple_assign_rhs1 (stmt);
1396 continue;
1397
1398 case GIMPLE_UNARY_RHS:
1399 rhs_count = 1;
1400 break;
1401
1402 case GIMPLE_BINARY_RHS:
1403 rhs_count = 2;
1404 break;
1405
1406 case GIMPLE_TERNARY_RHS:
1407 rhs_count = 3;
1408 break;
1409
1410 default:
1411 goto fail;
1412 }
1413
1414 /* Stop if expression is too complex. */
1415 if (op_count++ == op_limit)
1416 break;
1417
1418 if (param_ops_p)
1419 {
1420 eval_op.code = gimple_assign_rhs_code (stmt);
1421 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1422 eval_op.val[0] = NULL_TREE;
1423 eval_op.val[1] = NULL_TREE;
1424 }
1425
1426 expr = NULL_TREE;
1427 for (unsigned i = 0; i < rhs_count; i++)
1428 {
1429 tree op = gimple_op (stmt, i + 1);
1430
1431 gcc_assert (op && !TYPE_P (op));
1432 if (is_gimple_ip_invariant (op))
1433 {
1434 if (++cst_count == rhs_count)
1435 goto fail;
1436
1437 eval_op.val[cst_count - 1] = op;
1438 }
1439 else if (!expr)
1440 {
1441 /* Found a non-constant operand, and record its index in rhs
1442 operands. */
1443 eval_op.index = i;
1444 expr = op;
1445 }
1446 else
1447 {
1448 /* Found more than one non-constant operands. */
1449 goto fail;
1450 }
1451 }
1452
1453 if (param_ops_p)
1454 vec_safe_insert (*param_ops_p, 0, eval_op);
1455 }
1456
1457 /* Failed to decompose, free resource and return. */
1458 fail:
1459 if (param_ops_p)
1460 vec_free (*param_ops_p);
1461
1462 return false;
1463 }
1464
1465 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1466 predicates to the CFG edges. */
1467
1468 static void
set_cond_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1469 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1470 class ipa_fn_summary *summary,
1471 class ipa_node_params *params_summary,
1472 basic_block bb)
1473 {
1474 gimple *last;
1475 tree op, op2;
1476 int index;
1477 struct agg_position_info aggpos;
1478 enum tree_code code, inverted_code;
1479 edge e;
1480 edge_iterator ei;
1481 gimple *set_stmt;
1482 tree param_type;
1483 expr_eval_ops param_ops;
1484
1485 last = last_stmt (bb);
1486 if (!last || gimple_code (last) != GIMPLE_COND)
1487 return;
1488 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1489 return;
1490 op = gimple_cond_lhs (last);
1491
1492 if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos,
1493 ¶m_ops))
1494 {
1495 code = gimple_cond_code (last);
1496 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1497
1498 FOR_EACH_EDGE (e, ei, bb->succs)
1499 {
1500 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1501 ? code : inverted_code);
1502 /* invert_tree_comparison will return ERROR_MARK on FP
1503 comparisons that are not EQ/NE instead of returning proper
1504 unordered one. Be sure it is not confused with NON_CONSTANT.
1505
1506 And if the edge's target is the final block of diamond CFG graph
1507 of this conditional statement, we do not need to compute
1508 predicate for the edge because the final block's predicate must
1509 be at least as that of the first block of the statement. */
1510 if (this_code != ERROR_MARK
1511 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1512 {
1513 predicate p
1514 = add_condition (summary, params_summary, index,
1515 param_type, &aggpos,
1516 this_code, gimple_cond_rhs (last), param_ops);
1517 e->aux = edge_predicate_pool.allocate ();
1518 *(predicate *) e->aux = p;
1519 }
1520 }
1521 vec_free (param_ops);
1522 }
1523
1524 if (TREE_CODE (op) != SSA_NAME)
1525 return;
1526 /* Special case
1527 if (builtin_constant_p (op))
1528 constant_code
1529 else
1530 nonconstant_code.
1531 Here we can predicate nonconstant_code. We can't
1532 really handle constant_code since we have no predicate
1533 for this and also the constant code is not known to be
1534 optimized away when inliner doesn't see operand is constant.
1535 Other optimizers might think otherwise. */
1536 if (gimple_cond_code (last) != NE_EXPR
1537 || !integer_zerop (gimple_cond_rhs (last)))
1538 return;
1539 set_stmt = SSA_NAME_DEF_STMT (op);
1540 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1541 || gimple_call_num_args (set_stmt) != 1)
1542 return;
1543 op2 = gimple_call_arg (set_stmt, 0);
1544 if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos))
1545 return;
1546 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1547 {
1548 predicate p = add_condition (summary, params_summary, index,
1549 param_type, &aggpos,
1550 predicate::is_not_constant, NULL_TREE);
1551 e->aux = edge_predicate_pool.allocate ();
1552 *(predicate *) e->aux = p;
1553 }
1554 }
1555
1556
1557 /* If BB ends by a switch we can turn into predicates, attach corresponding
1558 predicates to the CFG edges. */
1559
1560 static void
set_switch_stmt_execution_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb)1561 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1562 class ipa_fn_summary *summary,
1563 class ipa_node_params *params_summary,
1564 basic_block bb)
1565 {
1566 gimple *lastg;
1567 tree op;
1568 int index;
1569 struct agg_position_info aggpos;
1570 edge e;
1571 edge_iterator ei;
1572 size_t n;
1573 size_t case_idx;
1574 tree param_type;
1575 expr_eval_ops param_ops;
1576
1577 lastg = last_stmt (bb);
1578 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1579 return;
1580 gswitch *last = as_a <gswitch *> (lastg);
1581 op = gimple_switch_index (last);
1582 if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos,
1583 ¶m_ops))
1584 return;
1585
1586 auto_vec<std::pair<tree, tree> > ranges;
1587 tree type = TREE_TYPE (op);
1588 int bound_limit = opt_for_fn (fbi->node->decl,
1589 param_ipa_max_switch_predicate_bounds);
1590 int bound_count = 0;
1591 wide_int vr_wmin, vr_wmax;
1592 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1593
1594 FOR_EACH_EDGE (e, ei, bb->succs)
1595 {
1596 e->aux = edge_predicate_pool.allocate ();
1597 *(predicate *) e->aux = false;
1598 }
1599
1600 e = gimple_switch_edge (cfun, last, 0);
1601 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1602 default case if its target basic block is in convergence point of all
1603 switch cases, which can be determined by checking whether it
1604 post-dominates the switch statement. */
1605 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1606 bound_count = INT_MAX;
1607
1608 n = gimple_switch_num_labels (last);
1609 for (case_idx = 1; case_idx < n; ++case_idx)
1610 {
1611 tree cl = gimple_switch_label (last, case_idx);
1612 tree min = CASE_LOW (cl);
1613 tree max = CASE_HIGH (cl);
1614 predicate p;
1615
1616 e = gimple_switch_edge (cfun, last, case_idx);
1617
1618 /* The case value might not have same type as switch expression,
1619 extend the value based on the expression type. */
1620 if (TREE_TYPE (min) != type)
1621 min = wide_int_to_tree (type, wi::to_wide (min));
1622
1623 if (!max)
1624 max = min;
1625 else if (TREE_TYPE (max) != type)
1626 max = wide_int_to_tree (type, wi::to_wide (max));
1627
1628 /* The case's target basic block is in convergence point of all switch
1629 cases, its predicate should be at least as that of the switch
1630 statement. */
1631 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1632 p = true;
1633 else if (min == max)
1634 p = add_condition (summary, params_summary, index, param_type,
1635 &aggpos, EQ_EXPR, min, param_ops);
1636 else
1637 {
1638 predicate p1, p2;
1639 p1 = add_condition (summary, params_summary, index, param_type,
1640 &aggpos, GE_EXPR, min, param_ops);
1641 p2 = add_condition (summary, params_summary,index, param_type,
1642 &aggpos, LE_EXPR, max, param_ops);
1643 p = p1 & p2;
1644 }
1645 *(class predicate *) e->aux
1646 = p.or_with (summary->conds, *(class predicate *) e->aux);
1647
1648 /* If there are too many disjoint case ranges, predicate for default
1649 case might become too complicated. So add a limit here. */
1650 if (bound_count > bound_limit)
1651 continue;
1652
1653 bool new_range = true;
1654
1655 if (!ranges.is_empty ())
1656 {
1657 wide_int curr_wmin = wi::to_wide (min);
1658 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1659
1660 /* Merge case ranges if they are continuous. */
1661 if (curr_wmin == last_wmax + 1)
1662 new_range = false;
1663 else if (vr_type == VR_ANTI_RANGE)
1664 {
1665 /* If two disjoint case ranges can be connected by anti-range
1666 of switch index, combine them to one range. */
1667 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1668 vr_type = VR_UNDEFINED;
1669 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1670 new_range = false;
1671 }
1672 }
1673
1674 /* Create/extend a case range. And we count endpoints of range set,
1675 this number nearly equals to number of conditions that we will create
1676 for predicate of default case. */
1677 if (new_range)
1678 {
1679 bound_count += (min == max) ? 1 : 2;
1680 ranges.safe_push (std::make_pair (min, max));
1681 }
1682 else
1683 {
1684 bound_count += (ranges.last ().first == ranges.last ().second);
1685 ranges.last ().second = max;
1686 }
1687 }
1688
1689 e = gimple_switch_edge (cfun, last, 0);
1690 if (bound_count > bound_limit)
1691 {
1692 *(class predicate *) e->aux = true;
1693 vec_free (param_ops);
1694 return;
1695 }
1696
1697 predicate p_seg = true;
1698 predicate p_all = false;
1699
1700 if (vr_type != VR_RANGE)
1701 {
1702 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1703 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1704 }
1705
1706 /* Construct predicate to represent default range set that is negation of
1707 all case ranges. Case range is classified as containing single/non-single
1708 values. Suppose a piece of case ranges in the following.
1709
1710 [D1...D2] [S1] ... [Sn] [D3...D4]
1711
1712 To represent default case's range sets between two non-single value
1713 case ranges (From D2 to D3), we construct predicate as:
1714
1715 D2 < x < D3 && x != S1 && ... && x != Sn
1716 */
1717 for (size_t i = 0; i < ranges.length (); i++)
1718 {
1719 tree min = ranges[i].first;
1720 tree max = ranges[i].second;
1721
1722 if (min == max)
1723 p_seg &= add_condition (summary, params_summary, index,
1724 param_type, &aggpos, NE_EXPR,
1725 min, param_ops);
1726 else
1727 {
1728 /* Do not create sub-predicate for range that is beyond low bound
1729 of switch index. */
1730 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1731 {
1732 p_seg &= add_condition (summary, params_summary, index,
1733 param_type, &aggpos,
1734 LT_EXPR, min, param_ops);
1735 p_all = p_all.or_with (summary->conds, p_seg);
1736 }
1737
1738 /* Do not create sub-predicate for range that is beyond up bound
1739 of switch index. */
1740 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1741 {
1742 p_seg = false;
1743 break;
1744 }
1745
1746 p_seg = add_condition (summary, params_summary, index,
1747 param_type, &aggpos, GT_EXPR,
1748 max, param_ops);
1749 }
1750 }
1751
1752 p_all = p_all.or_with (summary->conds, p_seg);
1753 *(class predicate *) e->aux
1754 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1755
1756 vec_free (param_ops);
1757 }
1758
1759
1760 /* For each BB in NODE attach to its AUX pointer predicate under
1761 which it is executable. */
1762
1763 static void
compute_bb_predicates(struct ipa_func_body_info * fbi,struct cgraph_node * node,class ipa_fn_summary * summary,class ipa_node_params * params_summary)1764 compute_bb_predicates (struct ipa_func_body_info *fbi,
1765 struct cgraph_node *node,
1766 class ipa_fn_summary *summary,
1767 class ipa_node_params *params_summary)
1768 {
1769 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1770 bool done = false;
1771 basic_block bb;
1772
1773 FOR_EACH_BB_FN (bb, my_function)
1774 {
1775 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1776 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1777 }
1778
1779 /* Entry block is always executable. */
1780 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1781 = edge_predicate_pool.allocate ();
1782 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1783
1784 /* A simple dataflow propagation of predicates forward in the CFG.
1785 TODO: work in reverse postorder. */
1786 while (!done)
1787 {
1788 done = true;
1789 FOR_EACH_BB_FN (bb, my_function)
1790 {
1791 predicate p = false;
1792 edge e;
1793 edge_iterator ei;
1794 FOR_EACH_EDGE (e, ei, bb->preds)
1795 {
1796 if (e->src->aux)
1797 {
1798 predicate this_bb_predicate
1799 = *(predicate *) e->src->aux;
1800 if (e->aux)
1801 this_bb_predicate &= (*(class predicate *) e->aux);
1802 p = p.or_with (summary->conds, this_bb_predicate);
1803 if (p == true)
1804 break;
1805 }
1806 }
1807 if (p != false)
1808 {
1809 basic_block pdom_bb;
1810
1811 if (!bb->aux)
1812 {
1813 done = false;
1814 bb->aux = edge_predicate_pool.allocate ();
1815 *((predicate *) bb->aux) = p;
1816 }
1817 else if (p != *(predicate *) bb->aux)
1818 {
1819 /* This OR operation is needed to ensure monotonous data flow
1820 in the case we hit the limit on number of clauses and the
1821 and/or operations above give approximate answers. */
1822 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1823 if (p != *(predicate *) bb->aux)
1824 {
1825 done = false;
1826 *((predicate *) bb->aux) = p;
1827 }
1828 }
1829
1830 /* For switch/if statement, we can OR-combine predicates of all
1831 its cases/branches to get predicate for basic block in their
1832 convergence point, but sometimes this will generate very
1833 complicated predicate. Actually, we can get simplified
1834 predicate in another way by using the fact that predicate
1835 for a basic block must also hold true for its post dominators.
1836 To be specific, basic block in convergence point of
1837 conditional statement should include predicate of the
1838 statement. */
1839 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1840 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1841 ;
1842 else if (!pdom_bb->aux)
1843 {
1844 done = false;
1845 pdom_bb->aux = edge_predicate_pool.allocate ();
1846 *((predicate *) pdom_bb->aux) = p;
1847 }
1848 else if (p != *(predicate *) pdom_bb->aux)
1849 {
1850 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1851 if (p != *(predicate *) pdom_bb->aux)
1852 {
1853 done = false;
1854 *((predicate *) pdom_bb->aux) = p;
1855 }
1856 }
1857 }
1858 }
1859 }
1860 }
1861
1862
1863 /* Return predicate specifying when the STMT might have result that is not
1864 a compile time constant. */
1865
1866 static predicate
will_be_nonconstant_expr_predicate(ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,tree expr,vec<predicate> nonconstant_names)1867 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1868 class ipa_fn_summary *summary,
1869 class ipa_node_params *params_summary,
1870 tree expr,
1871 vec<predicate> nonconstant_names)
1872 {
1873 tree parm;
1874 int index;
1875
1876 while (UNARY_CLASS_P (expr))
1877 expr = TREE_OPERAND (expr, 0);
1878
1879 parm = unmodified_parm (fbi, NULL, expr, NULL);
1880 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1881 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1882 predicate::changed, NULL_TREE);
1883 if (is_gimple_min_invariant (expr))
1884 return false;
1885 if (TREE_CODE (expr) == SSA_NAME)
1886 return nonconstant_names[SSA_NAME_VERSION (expr)];
1887 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1888 {
1889 predicate p1
1890 = will_be_nonconstant_expr_predicate (fbi, summary,
1891 params_summary,
1892 TREE_OPERAND (expr, 0),
1893 nonconstant_names);
1894 if (p1 == true)
1895 return p1;
1896
1897 predicate p2
1898 = will_be_nonconstant_expr_predicate (fbi, summary,
1899 params_summary,
1900 TREE_OPERAND (expr, 1),
1901 nonconstant_names);
1902 return p1.or_with (summary->conds, p2);
1903 }
1904 else if (TREE_CODE (expr) == COND_EXPR)
1905 {
1906 predicate p1
1907 = will_be_nonconstant_expr_predicate (fbi, summary,
1908 params_summary,
1909 TREE_OPERAND (expr, 0),
1910 nonconstant_names);
1911 if (p1 == true)
1912 return p1;
1913
1914 predicate p2
1915 = will_be_nonconstant_expr_predicate (fbi, summary,
1916 params_summary,
1917 TREE_OPERAND (expr, 1),
1918 nonconstant_names);
1919 if (p2 == true)
1920 return p2;
1921 p1 = p1.or_with (summary->conds, p2);
1922 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1923 params_summary,
1924 TREE_OPERAND (expr, 2),
1925 nonconstant_names);
1926 return p2.or_with (summary->conds, p1);
1927 }
1928 else if (TREE_CODE (expr) == CALL_EXPR)
1929 return true;
1930 else
1931 {
1932 debug_tree (expr);
1933 gcc_unreachable ();
1934 }
1935 return false;
1936 }
1937
1938
1939 /* Return predicate specifying when the STMT might have result that is not
1940 a compile time constant. */
1941
1942 static predicate
will_be_nonconstant_predicate(struct ipa_func_body_info * fbi,class ipa_fn_summary * summary,class ipa_node_params * params_summary,gimple * stmt,vec<predicate> nonconstant_names)1943 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1944 class ipa_fn_summary *summary,
1945 class ipa_node_params *params_summary,
1946 gimple *stmt,
1947 vec<predicate> nonconstant_names)
1948 {
1949 predicate p = true;
1950 ssa_op_iter iter;
1951 tree use;
1952 tree param_type = NULL_TREE;
1953 predicate op_non_const;
1954 bool is_load;
1955 int base_index;
1956 struct agg_position_info aggpos;
1957
1958 /* What statements might be optimized away
1959 when their arguments are constant. */
1960 if (gimple_code (stmt) != GIMPLE_ASSIGN
1961 && gimple_code (stmt) != GIMPLE_COND
1962 && gimple_code (stmt) != GIMPLE_SWITCH
1963 && (gimple_code (stmt) != GIMPLE_CALL
1964 || !(gimple_call_flags (stmt) & ECF_CONST)))
1965 return p;
1966
1967 /* Stores will stay anyway. */
1968 if (gimple_store_p (stmt))
1969 return p;
1970
1971 is_load = gimple_assign_load_p (stmt);
1972
1973 /* Loads can be optimized when the value is known. */
1974 if (is_load)
1975 {
1976 tree op = gimple_assign_rhs1 (stmt);
1977 if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type,
1978 &aggpos))
1979 return p;
1980 }
1981 else
1982 base_index = -1;
1983
1984 /* See if we understand all operands before we start
1985 adding conditionals. */
1986 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1987 {
1988 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1989 /* For arguments we can build a condition. */
1990 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1991 continue;
1992 if (TREE_CODE (use) != SSA_NAME)
1993 return p;
1994 /* If we know when operand is constant,
1995 we still can say something useful. */
1996 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1997 continue;
1998 return p;
1999 }
2000
2001 if (is_load)
2002 op_non_const =
2003 add_condition (summary, params_summary,
2004 base_index, param_type, &aggpos,
2005 predicate::changed, NULL_TREE);
2006 else
2007 op_non_const = false;
2008 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2009 {
2010 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2011 int index;
2012
2013 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2014 {
2015 if (index != base_index)
2016 p = add_condition (summary, params_summary, index,
2017 TREE_TYPE (parm), NULL,
2018 predicate::changed, NULL_TREE);
2019 else
2020 continue;
2021 }
2022 else
2023 p = nonconstant_names[SSA_NAME_VERSION (use)];
2024 op_non_const = p.or_with (summary->conds, op_non_const);
2025 }
2026 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2027 && gimple_op (stmt, 0)
2028 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2029 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2030 = op_non_const;
2031 return op_non_const;
2032 }
2033
2034 struct record_modified_bb_info
2035 {
2036 tree op;
2037 bitmap bb_set;
2038 gimple *stmt;
2039 };
2040
2041 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2042 probability how often it changes between USE_BB.
2043 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2044 is in different loop nest, we can do better.
2045 This is all just estimate. In theory we look for minimal cut separating
2046 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2047 anyway. */
2048
2049 static basic_block
get_minimal_bb(basic_block init_bb,basic_block use_bb)2050 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2051 {
2052 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2053 if (l && l->header->count < init_bb->count)
2054 return l->header;
2055 return init_bb;
2056 }
2057
2058 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2059 set except for info->stmt. */
2060
2061 static bool
record_modified(ao_ref * ao ATTRIBUTE_UNUSED,tree vdef,void * data)2062 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2063 {
2064 struct record_modified_bb_info *info =
2065 (struct record_modified_bb_info *) data;
2066 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2067 return false;
2068 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2069 return false;
2070 bitmap_set_bit (info->bb_set,
2071 SSA_NAME_IS_DEFAULT_DEF (vdef)
2072 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2073 : get_minimal_bb
2074 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2075 gimple_bb (info->stmt))->index);
2076 if (dump_file)
2077 {
2078 fprintf (dump_file, " Param ");
2079 print_generic_expr (dump_file, info->op, TDF_SLIM);
2080 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2081 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2082 get_minimal_bb
2083 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2084 gimple_bb (info->stmt))->index);
2085 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2086 }
2087 return false;
2088 }
2089
2090 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2091 will change since last invocation of STMT.
2092
2093 Value 0 is reserved for compile time invariants.
2094 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2095 ought to be REG_BR_PROB_BASE / estimated_iters. */
2096
2097 static int
param_change_prob(ipa_func_body_info * fbi,gimple * stmt,int i)2098 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2099 {
2100 tree op = gimple_call_arg (stmt, i);
2101 basic_block bb = gimple_bb (stmt);
2102
2103 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2104 op = TREE_OPERAND (op, 0);
2105
2106 tree base = get_base_address (op);
2107
2108 /* Global invariants never change. */
2109 if (is_gimple_min_invariant (base))
2110 return 0;
2111
2112 /* We would have to do non-trivial analysis to really work out what
2113 is the probability of value to change (i.e. when init statement
2114 is in a sibling loop of the call).
2115
2116 We do an conservative estimate: when call is executed N times more often
2117 than the statement defining value, we take the frequency 1/N. */
2118 if (TREE_CODE (base) == SSA_NAME)
2119 {
2120 profile_count init_count;
2121
2122 if (!bb->count.nonzero_p ())
2123 return REG_BR_PROB_BASE;
2124
2125 if (SSA_NAME_IS_DEFAULT_DEF (base))
2126 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2127 else
2128 init_count = get_minimal_bb
2129 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2130 gimple_bb (stmt))->count;
2131
2132 if (init_count < bb->count)
2133 return MAX ((init_count.to_sreal_scale (bb->count)
2134 * REG_BR_PROB_BASE).to_int (), 1);
2135 return REG_BR_PROB_BASE;
2136 }
2137 else
2138 {
2139 ao_ref refd;
2140 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2141 struct record_modified_bb_info info;
2142 tree init = ctor_for_folding (base);
2143
2144 if (init != error_mark_node)
2145 return 0;
2146 if (!bb->count.nonzero_p ())
2147 return REG_BR_PROB_BASE;
2148 if (dump_file)
2149 {
2150 fprintf (dump_file, " Analyzing param change probability of ");
2151 print_generic_expr (dump_file, op, TDF_SLIM);
2152 fprintf (dump_file, "\n");
2153 }
2154 ao_ref_init (&refd, op);
2155 info.op = op;
2156 info.stmt = stmt;
2157 info.bb_set = BITMAP_ALLOC (NULL);
2158 int walked
2159 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2160 NULL, NULL, fbi->aa_walk_budget);
2161 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2162 {
2163 if (dump_file)
2164 {
2165 if (walked < 0)
2166 fprintf (dump_file, " Ran out of AA walking budget.\n");
2167 else
2168 fprintf (dump_file, " Set in same BB as used.\n");
2169 }
2170 BITMAP_FREE (info.bb_set);
2171 return REG_BR_PROB_BASE;
2172 }
2173
2174 bitmap_iterator bi;
2175 unsigned index;
2176 /* Lookup the most frequent update of the value and believe that
2177 it dominates all the other; precise analysis here is difficult. */
2178 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2179 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2180 if (dump_file)
2181 {
2182 fprintf (dump_file, " Set with count ");
2183 max.dump (dump_file);
2184 fprintf (dump_file, " and used with count ");
2185 bb->count.dump (dump_file);
2186 fprintf (dump_file, " freq %f\n",
2187 max.to_sreal_scale (bb->count).to_double ());
2188 }
2189
2190 BITMAP_FREE (info.bb_set);
2191 if (max < bb->count)
2192 return MAX ((max.to_sreal_scale (bb->count)
2193 * REG_BR_PROB_BASE).to_int (), 1);
2194 return REG_BR_PROB_BASE;
2195 }
2196 }
2197
2198 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2199 sub-graph and if the predicate the condition depends on is known. If so,
2200 return true and store the pointer the predicate in *P. */
2201
2202 static bool
phi_result_unknown_predicate(ipa_func_body_info * fbi,ipa_fn_summary * summary,class ipa_node_params * params_summary,basic_block bb,predicate * p,vec<predicate> nonconstant_names)2203 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2204 ipa_fn_summary *summary,
2205 class ipa_node_params *params_summary,
2206 basic_block bb,
2207 predicate *p,
2208 vec<predicate> nonconstant_names)
2209 {
2210 edge e;
2211 edge_iterator ei;
2212 basic_block first_bb = NULL;
2213 gimple *stmt;
2214
2215 if (single_pred_p (bb))
2216 {
2217 *p = false;
2218 return true;
2219 }
2220
2221 FOR_EACH_EDGE (e, ei, bb->preds)
2222 {
2223 if (single_succ_p (e->src))
2224 {
2225 if (!single_pred_p (e->src))
2226 return false;
2227 if (!first_bb)
2228 first_bb = single_pred (e->src);
2229 else if (single_pred (e->src) != first_bb)
2230 return false;
2231 }
2232 else
2233 {
2234 if (!first_bb)
2235 first_bb = e->src;
2236 else if (e->src != first_bb)
2237 return false;
2238 }
2239 }
2240
2241 if (!first_bb)
2242 return false;
2243
2244 stmt = last_stmt (first_bb);
2245 if (!stmt
2246 || gimple_code (stmt) != GIMPLE_COND
2247 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2248 return false;
2249
2250 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2251 gimple_cond_lhs (stmt),
2252 nonconstant_names);
2253 if (*p == true)
2254 return false;
2255 else
2256 return true;
2257 }
2258
2259 /* Given a PHI statement in a function described by inline properties SUMMARY
2260 and *P being the predicate describing whether the selected PHI argument is
2261 known, store a predicate for the result of the PHI statement into
2262 NONCONSTANT_NAMES, if possible. */
2263
2264 static void
predicate_for_phi_result(class ipa_fn_summary * summary,gphi * phi,predicate * p,vec<predicate> nonconstant_names)2265 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2266 predicate *p,
2267 vec<predicate> nonconstant_names)
2268 {
2269 unsigned i;
2270
2271 for (i = 0; i < gimple_phi_num_args (phi); i++)
2272 {
2273 tree arg = gimple_phi_arg (phi, i)->def;
2274 if (!is_gimple_min_invariant (arg))
2275 {
2276 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2277 *p = p->or_with (summary->conds,
2278 nonconstant_names[SSA_NAME_VERSION (arg)]);
2279 if (*p == true)
2280 return;
2281 }
2282 }
2283
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 {
2286 fprintf (dump_file, "\t\tphi predicate: ");
2287 p->dump (dump_file, summary->conds);
2288 }
2289 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2290 }
2291
2292 /* For a typical usage of __builtin_expect (a<b, 1), we
2293 may introduce an extra relation stmt:
2294 With the builtin, we have
2295 t1 = a <= b;
2296 t2 = (long int) t1;
2297 t3 = __builtin_expect (t2, 1);
2298 if (t3 != 0)
2299 goto ...
2300 Without the builtin, we have
2301 if (a<=b)
2302 goto...
2303 This affects the size/time estimation and may have
2304 an impact on the earlier inlining.
2305 Here find this pattern and fix it up later. */
2306
2307 static gimple *
find_foldable_builtin_expect(basic_block bb)2308 find_foldable_builtin_expect (basic_block bb)
2309 {
2310 gimple_stmt_iterator bsi;
2311
2312 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2313 {
2314 gimple *stmt = gsi_stmt (bsi);
2315 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2316 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2317 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2318 {
2319 tree var = gimple_call_lhs (stmt);
2320 tree arg = gimple_call_arg (stmt, 0);
2321 use_operand_p use_p;
2322 gimple *use_stmt;
2323 bool match = false;
2324 bool done = false;
2325
2326 if (!var || !arg)
2327 continue;
2328 gcc_assert (TREE_CODE (var) == SSA_NAME);
2329
2330 while (TREE_CODE (arg) == SSA_NAME)
2331 {
2332 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2333 if (!is_gimple_assign (stmt_tmp))
2334 break;
2335 switch (gimple_assign_rhs_code (stmt_tmp))
2336 {
2337 case LT_EXPR:
2338 case LE_EXPR:
2339 case GT_EXPR:
2340 case GE_EXPR:
2341 case EQ_EXPR:
2342 case NE_EXPR:
2343 match = true;
2344 done = true;
2345 break;
2346 CASE_CONVERT:
2347 break;
2348 default:
2349 done = true;
2350 break;
2351 }
2352 if (done)
2353 break;
2354 arg = gimple_assign_rhs1 (stmt_tmp);
2355 }
2356
2357 if (match && single_imm_use (var, &use_p, &use_stmt)
2358 && gimple_code (use_stmt) == GIMPLE_COND)
2359 return use_stmt;
2360 }
2361 }
2362 return NULL;
2363 }
2364
2365 /* Return true when the basic blocks contains only clobbers followed by RESX.
2366 Such BBs are kept around to make removal of dead stores possible with
2367 presence of EH and will be optimized out by optimize_clobbers later in the
2368 game.
2369
2370 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2371 that can be clobber only, too.. When it is false, the RESX is not necessary
2372 on the end of basic block. */
2373
2374 static bool
2375 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2376 {
2377 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2378 edge_iterator ei;
2379 edge e;
2380
2381 if (need_eh)
2382 {
2383 if (gsi_end_p (gsi))
2384 return false;
2385 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2386 return false;
2387 gsi_prev (&gsi);
2388 }
2389 else if (!single_succ_p (bb))
2390 return false;
2391
2392 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2393 {
2394 gimple *stmt = gsi_stmt (gsi);
2395 if (is_gimple_debug (stmt))
2396 continue;
2397 if (gimple_clobber_p (stmt))
2398 continue;
2399 if (gimple_code (stmt) == GIMPLE_LABEL)
2400 break;
2401 return false;
2402 }
2403
2404 /* See if all predecessors are either throws or clobber only BBs. */
2405 FOR_EACH_EDGE (e, ei, bb->preds)
2406 if (!(e->flags & EDGE_EH)
2407 && !clobber_only_eh_bb_p (e->src, false))
2408 return false;
2409
2410 return true;
2411 }
2412
2413 /* Return true if STMT compute a floating point expression that may be affected
2414 by -ffast-math and similar flags. */
2415
2416 static bool
fp_expression_p(gimple * stmt)2417 fp_expression_p (gimple *stmt)
2418 {
2419 ssa_op_iter i;
2420 tree op;
2421
2422 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2423 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2424 return true;
2425 return false;
2426 }
2427
2428 /* Analyze function body for NODE.
2429 EARLY indicates run from early optimization pipeline. */
2430
2431 static void
analyze_function_body(struct cgraph_node * node,bool early)2432 analyze_function_body (struct cgraph_node *node, bool early)
2433 {
2434 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2435 /* Estimate static overhead for function prologue/epilogue and alignment. */
2436 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2437 /* Benefits are scaled by probability of elimination that is in range
2438 <0,2>. */
2439 basic_block bb;
2440 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2441 sreal freq;
2442 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2443 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2444 predicate bb_predicate;
2445 struct ipa_func_body_info fbi;
2446 vec<predicate> nonconstant_names = vNULL;
2447 int nblocks, n;
2448 int *order;
2449 gimple *fix_builtin_expect_stmt;
2450
2451 gcc_assert (my_function && my_function->cfg);
2452 gcc_assert (cfun == my_function);
2453
2454 memset(&fbi, 0, sizeof(fbi));
2455 vec_free (info->conds);
2456 info->conds = NULL;
2457 vec_free (info->size_time_table);
2458 info->size_time_table = NULL;
2459
2460 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2461 so we can produce proper inline hints.
2462
2463 When optimizing and analyzing for early inliner, initialize node params
2464 so we can produce correct BB predicates. */
2465
2466 if (opt_for_fn (node->decl, optimize))
2467 {
2468 calculate_dominance_info (CDI_DOMINATORS);
2469 calculate_dominance_info (CDI_POST_DOMINATORS);
2470 if (!early)
2471 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2472 else
2473 {
2474 ipa_check_create_node_params ();
2475 ipa_initialize_node_params (node);
2476 }
2477
2478 if (ipa_node_params_sum)
2479 {
2480 fbi.node = node;
2481 fbi.info = IPA_NODE_REF (node);
2482 fbi.bb_infos = vNULL;
2483 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2484 fbi.param_count = count_formal_params (node->decl);
2485 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2486
2487 nonconstant_names.safe_grow_cleared
2488 (SSANAMES (my_function)->length ());
2489 }
2490 }
2491
2492 if (dump_file)
2493 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2494 node->dump_name ());
2495
2496 /* When we run into maximal number of entries, we assign everything to the
2497 constant truth case. Be sure to have it in list. */
2498 bb_predicate = true;
2499 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2500
2501 bb_predicate = predicate::not_inlined ();
2502 info->account_size_time (opt_for_fn (node->decl,
2503 param_uninlined_function_insns)
2504 * ipa_fn_summary::size_scale,
2505 opt_for_fn (node->decl,
2506 param_uninlined_function_time),
2507 bb_predicate,
2508 bb_predicate);
2509
2510 if (fbi.info)
2511 compute_bb_predicates (&fbi, node, info, params_summary);
2512 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2513 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2514 for (n = 0; n < nblocks; n++)
2515 {
2516 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2517 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2518 if (clobber_only_eh_bb_p (bb))
2519 {
2520 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, "\n Ignoring BB %i;"
2522 " it will be optimized away by cleanup_clobbers\n",
2523 bb->index);
2524 continue;
2525 }
2526
2527 /* TODO: Obviously predicates can be propagated down across CFG. */
2528 if (fbi.info)
2529 {
2530 if (bb->aux)
2531 bb_predicate = *(predicate *) bb->aux;
2532 else
2533 bb_predicate = false;
2534 }
2535 else
2536 bb_predicate = true;
2537
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 {
2540 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2541 bb_predicate.dump (dump_file, info->conds);
2542 }
2543
2544 if (fbi.info && nonconstant_names.exists ())
2545 {
2546 predicate phi_predicate;
2547 bool first_phi = true;
2548
2549 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2550 gsi_next (&bsi))
2551 {
2552 if (first_phi
2553 && !phi_result_unknown_predicate (&fbi, info,
2554 params_summary,
2555 bb,
2556 &phi_predicate,
2557 nonconstant_names))
2558 break;
2559 first_phi = false;
2560 if (dump_file && (dump_flags & TDF_DETAILS))
2561 {
2562 fprintf (dump_file, " ");
2563 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2564 }
2565 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2566 nonconstant_names);
2567 }
2568 }
2569
2570 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2571
2572 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2573 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2574 {
2575 gimple *stmt = gsi_stmt (bsi);
2576 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2577 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2578 int prob;
2579 predicate will_be_nonconstant;
2580
2581 /* This relation stmt should be folded after we remove
2582 __builtin_expect call. Adjust the cost here. */
2583 if (stmt == fix_builtin_expect_stmt)
2584 {
2585 this_size--;
2586 this_time--;
2587 }
2588
2589 if (dump_file && (dump_flags & TDF_DETAILS))
2590 {
2591 fprintf (dump_file, " ");
2592 print_gimple_stmt (dump_file, stmt, 0);
2593 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2594 freq.to_double (), this_size,
2595 this_time);
2596 }
2597
2598 if (is_gimple_call (stmt)
2599 && !gimple_call_internal_p (stmt))
2600 {
2601 struct cgraph_edge *edge = node->get_edge (stmt);
2602 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2603
2604 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2605 resolved as constant. We however don't want to optimize
2606 out the cgraph edges. */
2607 if (nonconstant_names.exists ()
2608 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2609 && gimple_call_lhs (stmt)
2610 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2611 {
2612 predicate false_p = false;
2613 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2614 = false_p;
2615 }
2616 if (ipa_node_params_sum)
2617 {
2618 int count = gimple_call_num_args (stmt);
2619 int i;
2620
2621 if (count)
2622 es->param.safe_grow_cleared (count);
2623 for (i = 0; i < count; i++)
2624 {
2625 int prob = param_change_prob (&fbi, stmt, i);
2626 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2627 es->param[i].change_prob = prob;
2628 }
2629 }
2630
2631 es->call_stmt_size = this_size;
2632 es->call_stmt_time = this_time;
2633 es->loop_depth = bb_loop_depth (bb);
2634 edge_set_predicate (edge, &bb_predicate);
2635 if (edge->speculative)
2636 {
2637 cgraph_edge *indirect
2638 = edge->speculative_call_indirect_edge ();
2639 ipa_call_summary *es2
2640 = ipa_call_summaries->get_create (indirect);
2641 ipa_call_summaries->duplicate (edge, indirect,
2642 es, es2);
2643
2644 /* Edge is the first direct call.
2645 create and duplicate call summaries for multiple
2646 speculative call targets. */
2647 for (cgraph_edge *direct
2648 = edge->next_speculative_call_target ();
2649 direct;
2650 direct = direct->next_speculative_call_target ())
2651 {
2652 ipa_call_summary *es3
2653 = ipa_call_summaries->get_create (direct);
2654 ipa_call_summaries->duplicate (edge, direct,
2655 es, es3);
2656 }
2657 }
2658 }
2659
2660 /* TODO: When conditional jump or switch is known to be constant, but
2661 we did not translate it into the predicates, we really can account
2662 just maximum of the possible paths. */
2663 if (fbi.info)
2664 will_be_nonconstant
2665 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2666 stmt, nonconstant_names);
2667 else
2668 will_be_nonconstant = true;
2669 if (this_time || this_size)
2670 {
2671 sreal final_time = (sreal)this_time * freq;
2672
2673 prob = eliminated_by_inlining_prob (&fbi, stmt);
2674 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2675 fprintf (dump_file,
2676 "\t\t50%% will be eliminated by inlining\n");
2677 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2678 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2679
2680 class predicate p = bb_predicate & will_be_nonconstant;
2681
2682 /* We can ignore statement when we proved it is never going
2683 to happen, but we cannot do that for call statements
2684 because edges are accounted specially. */
2685
2686 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2687 {
2688 time += final_time;
2689 size += this_size;
2690 }
2691
2692 /* We account everything but the calls. Calls have their own
2693 size/time info attached to cgraph edges. This is necessary
2694 in order to make the cost disappear after inlining. */
2695 if (!is_gimple_call (stmt))
2696 {
2697 if (prob)
2698 {
2699 predicate ip = bb_predicate & predicate::not_inlined ();
2700 info->account_size_time (this_size * prob,
2701 (final_time * prob) / 2, ip,
2702 p);
2703 }
2704 if (prob != 2)
2705 info->account_size_time (this_size * (2 - prob),
2706 (final_time * (2 - prob) / 2),
2707 bb_predicate,
2708 p);
2709 }
2710
2711 if (!info->fp_expressions && fp_expression_p (stmt))
2712 {
2713 info->fp_expressions = true;
2714 if (dump_file)
2715 fprintf (dump_file, " fp_expression set\n");
2716 }
2717 }
2718
2719 /* Account cost of address calculations in the statements. */
2720 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2721 {
2722 for (tree op = gimple_op (stmt, i);
2723 op && handled_component_p (op);
2724 op = TREE_OPERAND (op, 0))
2725 if ((TREE_CODE (op) == ARRAY_REF
2726 || TREE_CODE (op) == ARRAY_RANGE_REF)
2727 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2728 {
2729 predicate p = bb_predicate;
2730 if (fbi.info)
2731 p = p & will_be_nonconstant_expr_predicate
2732 (&fbi, info, params_summary,
2733 TREE_OPERAND (op, 1),
2734 nonconstant_names);
2735 if (p != false)
2736 {
2737 time += freq;
2738 size += 1;
2739 if (dump_file)
2740 fprintf (dump_file,
2741 "\t\tAccounting address calculation.\n");
2742 info->account_size_time (ipa_fn_summary::size_scale,
2743 freq,
2744 bb_predicate,
2745 p);
2746 }
2747 }
2748 }
2749
2750 }
2751 }
2752 free (order);
2753
2754 if (nonconstant_names.exists () && !early)
2755 {
2756 class loop *loop;
2757 predicate loop_iterations = true;
2758 predicate loop_stride = true;
2759
2760 if (dump_file && (dump_flags & TDF_DETAILS))
2761 flow_loops_dump (dump_file, NULL, 0);
2762 scev_initialize ();
2763 FOR_EACH_LOOP (loop, 0)
2764 {
2765 vec<edge> exits;
2766 edge ex;
2767 unsigned int j;
2768 class tree_niter_desc niter_desc;
2769 if (loop->header->aux)
2770 bb_predicate = *(predicate *) loop->header->aux;
2771 else
2772 bb_predicate = false;
2773
2774 exits = get_loop_exit_edges (loop);
2775 FOR_EACH_VEC_ELT (exits, j, ex)
2776 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2777 && !is_gimple_min_invariant (niter_desc.niter))
2778 {
2779 predicate will_be_nonconstant
2780 = will_be_nonconstant_expr_predicate (&fbi, info,
2781 params_summary,
2782 niter_desc.niter,
2783 nonconstant_names);
2784 if (will_be_nonconstant != true)
2785 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2786 if (will_be_nonconstant != true
2787 && will_be_nonconstant != false)
2788 /* This is slightly inprecise. We may want to represent each
2789 loop with independent predicate. */
2790 loop_iterations &= will_be_nonconstant;
2791 }
2792 exits.release ();
2793 }
2794
2795 /* To avoid quadratic behavior we analyze stride predicates only
2796 with respect to the containing loop. Thus we simply iterate
2797 over all defs in the outermost loop body. */
2798 for (loop = loops_for_fn (cfun)->tree_root->inner;
2799 loop != NULL; loop = loop->next)
2800 {
2801 basic_block *body = get_loop_body (loop);
2802 for (unsigned i = 0; i < loop->num_nodes; i++)
2803 {
2804 gimple_stmt_iterator gsi;
2805 if (body[i]->aux)
2806 bb_predicate = *(predicate *) body[i]->aux;
2807 else
2808 bb_predicate = false;
2809 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2810 gsi_next (&gsi))
2811 {
2812 gimple *stmt = gsi_stmt (gsi);
2813
2814 if (!is_gimple_assign (stmt))
2815 continue;
2816
2817 tree def = gimple_assign_lhs (stmt);
2818 if (TREE_CODE (def) != SSA_NAME)
2819 continue;
2820
2821 affine_iv iv;
2822 if (!simple_iv (loop_containing_stmt (stmt),
2823 loop_containing_stmt (stmt),
2824 def, &iv, true)
2825 || is_gimple_min_invariant (iv.step))
2826 continue;
2827
2828 predicate will_be_nonconstant
2829 = will_be_nonconstant_expr_predicate (&fbi, info,
2830 params_summary,
2831 iv.step,
2832 nonconstant_names);
2833 if (will_be_nonconstant != true)
2834 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2835 if (will_be_nonconstant != true
2836 && will_be_nonconstant != false)
2837 /* This is slightly inprecise. We may want to represent
2838 each loop with independent predicate. */
2839 loop_stride = loop_stride & will_be_nonconstant;
2840 }
2841 }
2842 free (body);
2843 }
2844 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2845 set_hint_predicate (&s->loop_iterations, loop_iterations);
2846 set_hint_predicate (&s->loop_stride, loop_stride);
2847 scev_finalize ();
2848 }
2849 FOR_ALL_BB_FN (bb, my_function)
2850 {
2851 edge e;
2852 edge_iterator ei;
2853
2854 if (bb->aux)
2855 edge_predicate_pool.remove ((predicate *)bb->aux);
2856 bb->aux = NULL;
2857 FOR_EACH_EDGE (e, ei, bb->succs)
2858 {
2859 if (e->aux)
2860 edge_predicate_pool.remove ((predicate *) e->aux);
2861 e->aux = NULL;
2862 }
2863 }
2864 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2865 ipa_size_summary *ss = ipa_size_summaries->get (node);
2866 s->time = time;
2867 ss->self_size = size;
2868 nonconstant_names.release ();
2869 ipa_release_body_info (&fbi);
2870 if (opt_for_fn (node->decl, optimize))
2871 {
2872 if (!early)
2873 loop_optimizer_finalize ();
2874 else if (!ipa_edge_args_sum)
2875 ipa_free_all_node_params ();
2876 free_dominance_info (CDI_DOMINATORS);
2877 free_dominance_info (CDI_POST_DOMINATORS);
2878 }
2879 if (dump_file)
2880 {
2881 fprintf (dump_file, "\n");
2882 ipa_dump_fn_summary (dump_file, node);
2883 }
2884 }
2885
2886
2887 /* Compute function summary.
2888 EARLY is true when we compute parameters during early opts. */
2889
2890 void
compute_fn_summary(struct cgraph_node * node,bool early)2891 compute_fn_summary (struct cgraph_node *node, bool early)
2892 {
2893 HOST_WIDE_INT self_stack_size;
2894 struct cgraph_edge *e;
2895
2896 gcc_assert (!node->inlined_to);
2897
2898 if (!ipa_fn_summaries)
2899 ipa_fn_summary_alloc ();
2900
2901 /* Create a new ipa_fn_summary. */
2902 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2903 ipa_fn_summaries->remove (node);
2904 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2905 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2906
2907 /* Estimate the stack size for the function if we're optimizing. */
2908 self_stack_size = optimize && !node->thunk.thunk_p
2909 ? estimated_stack_frame_size (node) : 0;
2910 size_info->estimated_self_stack_size = self_stack_size;
2911 info->estimated_stack_size = self_stack_size;
2912
2913 if (node->thunk.thunk_p)
2914 {
2915 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2916 predicate t = true;
2917
2918 node->can_change_signature = false;
2919 es->call_stmt_size = eni_size_weights.call_cost;
2920 es->call_stmt_time = eni_time_weights.call_cost;
2921 info->account_size_time (ipa_fn_summary::size_scale
2922 * opt_for_fn (node->decl,
2923 param_uninlined_function_thunk_insns),
2924 opt_for_fn (node->decl,
2925 param_uninlined_function_thunk_time), t, t);
2926 t = predicate::not_inlined ();
2927 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2928 ipa_update_overall_fn_summary (node);
2929 size_info->self_size = size_info->size;
2930 if (stdarg_p (TREE_TYPE (node->decl)))
2931 {
2932 info->inlinable = false;
2933 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2934 }
2935 else
2936 info->inlinable = true;
2937 }
2938 else
2939 {
2940 /* Even is_gimple_min_invariant rely on current_function_decl. */
2941 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2942
2943 /* During IPA profile merging we may be called w/o virtual SSA form
2944 built. */
2945 update_ssa (TODO_update_ssa_only_virtuals);
2946
2947 /* Can this function be inlined at all? */
2948 if (!opt_for_fn (node->decl, optimize)
2949 && !lookup_attribute ("always_inline",
2950 DECL_ATTRIBUTES (node->decl)))
2951 info->inlinable = false;
2952 else
2953 info->inlinable = tree_inlinable_function_p (node->decl);
2954
2955 /* Type attributes can use parameter indices to describe them. */
2956 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2957 /* Likewise for #pragma omp declare simd functions or functions
2958 with simd attribute. */
2959 || lookup_attribute ("omp declare simd",
2960 DECL_ATTRIBUTES (node->decl)))
2961 node->can_change_signature = false;
2962 else
2963 {
2964 /* Otherwise, inlinable functions always can change signature. */
2965 if (info->inlinable)
2966 node->can_change_signature = true;
2967 else
2968 {
2969 /* Functions calling builtin_apply cannot change signature. */
2970 for (e = node->callees; e; e = e->next_callee)
2971 {
2972 tree cdecl = e->callee->decl;
2973 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2974 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2975 break;
2976 }
2977 node->can_change_signature = !e;
2978 }
2979 }
2980 analyze_function_body (node, early);
2981 pop_cfun ();
2982 }
2983
2984 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2985 size_info->size = size_info->self_size;
2986 info->estimated_stack_size = size_info->estimated_self_stack_size;
2987
2988 /* Code above should compute exactly the same result as
2989 ipa_update_overall_fn_summary except for case when speculative
2990 edges are present since these are accounted to size but not
2991 self_size. Do not compare time since different order the roundoff
2992 errors result in slight changes. */
2993 ipa_update_overall_fn_summary (node);
2994 if (flag_checking)
2995 {
2996 for (e = node->indirect_calls; e; e = e->next_callee)
2997 if (e->speculative)
2998 break;
2999 gcc_assert (e || size_info->size == size_info->self_size);
3000 }
3001 }
3002
3003
3004 /* Compute parameters of functions used by inliner using
3005 current_function_decl. */
3006
3007 static unsigned int
compute_fn_summary_for_current(void)3008 compute_fn_summary_for_current (void)
3009 {
3010 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3011 return 0;
3012 }
3013
3014 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
3015 KNOWN_CONTEXTS and KNOWN_AGGS. */
3016
3017 static bool
estimate_edge_devirt_benefit(struct cgraph_edge * ie,int * size,int * time,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3018 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3019 int *size, int *time,
3020 vec<tree> known_vals,
3021 vec<ipa_polymorphic_call_context> known_contexts,
3022 vec<ipa_agg_value_set> known_aggs)
3023 {
3024 tree target;
3025 struct cgraph_node *callee;
3026 class ipa_fn_summary *isummary;
3027 enum availability avail;
3028 bool speculative;
3029
3030 if (!known_vals.length () && !known_contexts.length ())
3031 return false;
3032 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3033 return false;
3034
3035 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
3036 known_aggs, &speculative);
3037 if (!target || speculative)
3038 return false;
3039
3040 /* Account for difference in cost between indirect and direct calls. */
3041 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3042 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3043 gcc_checking_assert (*time >= 0);
3044 gcc_checking_assert (*size >= 0);
3045
3046 callee = cgraph_node::get (target);
3047 if (!callee || !callee->definition)
3048 return false;
3049 callee = callee->function_symbol (&avail);
3050 if (avail < AVAIL_AVAILABLE)
3051 return false;
3052 isummary = ipa_fn_summaries->get (callee);
3053 if (isummary == NULL)
3054 return false;
3055
3056 return isummary->inlinable;
3057 }
3058
3059 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3060 handle edge E with probability PROB.
3061 Set HINTS if edge may be devirtualized.
3062 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3063 site. */
3064
3065 static inline void
estimate_edge_size_and_time(struct cgraph_edge * e,int * size,int * min_size,sreal * time,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,ipa_hints * hints)3066 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3067 sreal *time,
3068 vec<tree> known_vals,
3069 vec<ipa_polymorphic_call_context> known_contexts,
3070 vec<ipa_agg_value_set> known_aggs,
3071 ipa_hints *hints)
3072 {
3073 class ipa_call_summary *es = ipa_call_summaries->get (e);
3074 int call_size = es->call_stmt_size;
3075 int call_time = es->call_stmt_time;
3076 int cur_size;
3077
3078 if (!e->callee && hints && e->maybe_hot_p ()
3079 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3080 known_vals, known_contexts, known_aggs))
3081 *hints |= INLINE_HINT_indirect_call;
3082 cur_size = call_size * ipa_fn_summary::size_scale;
3083 *size += cur_size;
3084 if (min_size)
3085 *min_size += cur_size;
3086 if (time)
3087 *time += ((sreal)call_time) * e->sreal_frequency ();
3088 }
3089
3090
3091 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3092 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3093 describe context of the call site.
3094
3095 Helper for estimate_calls_size_and_time which does the same but
3096 (in most cases) faster. */
3097
3098 static void
estimate_calls_size_and_time_1(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3099 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3100 int *min_size, sreal *time,
3101 ipa_hints *hints,
3102 clause_t possible_truths,
3103 vec<tree> known_vals,
3104 vec<ipa_polymorphic_call_context> known_contexts,
3105 vec<ipa_agg_value_set> known_aggs)
3106 {
3107 struct cgraph_edge *e;
3108 for (e = node->callees; e; e = e->next_callee)
3109 {
3110 if (!e->inline_failed)
3111 {
3112 gcc_checking_assert (!ipa_call_summaries->get (e));
3113 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3114 hints,
3115 possible_truths,
3116 known_vals, known_contexts,
3117 known_aggs);
3118 continue;
3119 }
3120 class ipa_call_summary *es = ipa_call_summaries->get (e);
3121
3122 /* Do not care about zero sized builtins. */
3123 if (!es->call_stmt_size)
3124 {
3125 gcc_checking_assert (!es->call_stmt_time);
3126 continue;
3127 }
3128 if (!es->predicate
3129 || es->predicate->evaluate (possible_truths))
3130 {
3131 /* Predicates of calls shall not use NOT_CHANGED codes,
3132 so we do not need to compute probabilities. */
3133 estimate_edge_size_and_time (e, size,
3134 es->predicate ? NULL : min_size,
3135 time,
3136 known_vals, known_contexts,
3137 known_aggs, hints);
3138 }
3139 }
3140 for (e = node->indirect_calls; e; e = e->next_callee)
3141 {
3142 class ipa_call_summary *es = ipa_call_summaries->get (e);
3143 if (!es->predicate
3144 || es->predicate->evaluate (possible_truths))
3145 estimate_edge_size_and_time (e, size,
3146 es->predicate ? NULL : min_size,
3147 time,
3148 known_vals, known_contexts, known_aggs,
3149 hints);
3150 }
3151 }
3152
3153 /* Populate sum->call_size_time_table for edges from NODE. */
3154
3155 static void
summarize_calls_size_and_time(struct cgraph_node * node,ipa_fn_summary * sum)3156 summarize_calls_size_and_time (struct cgraph_node *node,
3157 ipa_fn_summary *sum)
3158 {
3159 struct cgraph_edge *e;
3160 for (e = node->callees; e; e = e->next_callee)
3161 {
3162 if (!e->inline_failed)
3163 {
3164 gcc_checking_assert (!ipa_call_summaries->get (e));
3165 summarize_calls_size_and_time (e->callee, sum);
3166 continue;
3167 }
3168 int size = 0;
3169 sreal time = 0;
3170
3171 estimate_edge_size_and_time (e, &size, NULL, &time,
3172 vNULL, vNULL, vNULL, NULL);
3173
3174 struct predicate pred = true;
3175 class ipa_call_summary *es = ipa_call_summaries->get (e);
3176
3177 if (es->predicate)
3178 pred = *es->predicate;
3179 sum->account_size_time (size, time, pred, pred, true);
3180 }
3181 for (e = node->indirect_calls; e; e = e->next_callee)
3182 {
3183 int size = 0;
3184 sreal time = 0;
3185
3186 estimate_edge_size_and_time (e, &size, NULL, &time,
3187 vNULL, vNULL, vNULL, NULL);
3188 struct predicate pred = true;
3189 class ipa_call_summary *es = ipa_call_summaries->get (e);
3190
3191 if (es->predicate)
3192 pred = *es->predicate;
3193 sum->account_size_time (size, time, pred, pred, true);
3194 }
3195 }
3196
3197 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3198 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3199 describe context of the call site. */
3200
3201 static void
estimate_calls_size_and_time(struct cgraph_node * node,int * size,int * min_size,sreal * time,ipa_hints * hints,clause_t possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs)3202 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3203 int *min_size, sreal *time,
3204 ipa_hints *hints,
3205 clause_t possible_truths,
3206 vec<tree> known_vals,
3207 vec<ipa_polymorphic_call_context> known_contexts,
3208 vec<ipa_agg_value_set> known_aggs)
3209 {
3210 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3211 bool use_table = true;
3212
3213 gcc_assert (node->callees || node->indirect_calls);
3214
3215 /* During early inlining we do not calculate info for very
3216 large functions and thus there is no need for producing
3217 summaries. */
3218 if (!ipa_node_params_sum)
3219 use_table = false;
3220 /* Do not calculate summaries for simple wrappers; it is waste
3221 of memory. */
3222 else if (node->callees && node->indirect_calls
3223 && node->callees->inline_failed && !node->callees->next_callee)
3224 use_table = false;
3225 /* If there is an indirect edge that may be optimized, we need
3226 to go the slow way. */
3227 else if ((known_vals.length ()
3228 || known_contexts.length ()
3229 || known_aggs.length ()) && hints)
3230 {
3231 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3232 unsigned int nargs = params_summary
3233 ? ipa_get_param_count (params_summary) : 0;
3234
3235 for (unsigned int i = 0; i < nargs && use_table; i++)
3236 {
3237 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3238 && ((known_vals.length () > i && known_vals[i])
3239 || (known_aggs.length () > i
3240 && known_aggs[i].items.length ())))
3241 use_table = false;
3242 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3243 && (known_contexts.length () > i
3244 && !known_contexts[i].useless_p ()))
3245 use_table = false;
3246 }
3247 }
3248
3249 /* Fast path is via the call size time table. */
3250 if (use_table)
3251 {
3252 /* Build summary if it is absent. */
3253 if (!sum->call_size_time_table)
3254 {
3255 predicate true_pred = true;
3256 sum->account_size_time (0, 0, true_pred, true_pred, true);
3257 summarize_calls_size_and_time (node, sum);
3258 }
3259
3260 int old_size = *size;
3261 sreal old_time = time ? *time : 0;
3262
3263 if (min_size)
3264 *min_size += (*sum->call_size_time_table)[0].size;
3265
3266 unsigned int i;
3267 size_time_entry *e;
3268
3269 /* Walk the table and account sizes and times. */
3270 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3271 i++)
3272 if (e->exec_predicate.evaluate (possible_truths))
3273 {
3274 *size += e->size;
3275 if (time)
3276 *time += e->time;
3277 }
3278
3279 /* Be careful and see if both methods agree. */
3280 if ((flag_checking || dump_file)
3281 /* Do not try to sanity check when we know we lost some
3282 precision. */
3283 && sum->call_size_time_table->length ()
3284 < ipa_fn_summary::max_size_time_table_size)
3285 {
3286 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3287 possible_truths, known_vals,
3288 known_contexts, known_aggs);
3289 gcc_assert (*size == old_size);
3290 if (time && (*time - old_time > 1 || *time - old_time < -1)
3291 && dump_file)
3292 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3293 old_time.to_double (),
3294 time->to_double ());
3295 }
3296 }
3297 /* Slow path by walking all edges. */
3298 else
3299 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3300 possible_truths, known_vals, known_contexts,
3301 known_aggs);
3302 }
3303
3304 /* Default constructor for ipa call context.
3305 Memory allocation of known_vals, known_contexts
3306 and known_aggs vectors is owned by the caller, but can
3307 be release by ipa_call_context::release.
3308
3309 inline_param_summary is owned by the caller. */
ipa_call_context(cgraph_node * node,clause_t possible_truths,clause_t nonspec_possible_truths,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,vec<inline_param_summary> inline_param_summary)3310 ipa_call_context::ipa_call_context (cgraph_node *node,
3311 clause_t possible_truths,
3312 clause_t nonspec_possible_truths,
3313 vec<tree> known_vals,
3314 vec<ipa_polymorphic_call_context>
3315 known_contexts,
3316 vec<ipa_agg_value_set> known_aggs,
3317 vec<inline_param_summary>
3318 inline_param_summary)
3319 : m_node (node), m_possible_truths (possible_truths),
3320 m_nonspec_possible_truths (nonspec_possible_truths),
3321 m_inline_param_summary (inline_param_summary),
3322 m_known_vals (known_vals),
3323 m_known_contexts (known_contexts),
3324 m_known_aggs (known_aggs)
3325 {
3326 }
3327
3328 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3329
3330 void
duplicate_from(const ipa_call_context & ctx)3331 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3332 {
3333 m_node = ctx.m_node;
3334 m_possible_truths = ctx.m_possible_truths;
3335 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3336 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3337 unsigned int nargs = params_summary
3338 ? ipa_get_param_count (params_summary) : 0;
3339
3340 m_inline_param_summary = vNULL;
3341 /* Copy the info only if there is at least one useful entry. */
3342 if (ctx.m_inline_param_summary.exists ())
3343 {
3344 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3345
3346 for (unsigned int i = 0; i < n; i++)
3347 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3348 && !ctx.m_inline_param_summary[i].useless_p ())
3349 {
3350 m_inline_param_summary
3351 = ctx.m_inline_param_summary.copy ();
3352 break;
3353 }
3354 }
3355 m_known_vals = vNULL;
3356 if (ctx.m_known_vals.exists ())
3357 {
3358 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3359
3360 for (unsigned int i = 0; i < n; i++)
3361 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3362 && ctx.m_known_vals[i])
3363 {
3364 m_known_vals = ctx.m_known_vals.copy ();
3365 break;
3366 }
3367 }
3368
3369 m_known_contexts = vNULL;
3370 if (ctx.m_known_contexts.exists ())
3371 {
3372 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3373
3374 for (unsigned int i = 0; i < n; i++)
3375 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3376 && !ctx.m_known_contexts[i].useless_p ())
3377 {
3378 m_known_contexts = ctx.m_known_contexts.copy ();
3379 break;
3380 }
3381 }
3382
3383 m_known_aggs = vNULL;
3384 if (ctx.m_known_aggs.exists ())
3385 {
3386 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3387
3388 for (unsigned int i = 0; i < n; i++)
3389 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3390 && !ctx.m_known_aggs[i].is_empty ())
3391 {
3392 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3393 break;
3394 }
3395 }
3396 }
3397
3398 /* Release memory used by known_vals/contexts/aggs vectors.
3399 If ALL is true release also inline_param_summary.
3400 This happens when context was previously duplicated to be stored
3401 into cache. */
3402
3403 void
release(bool all)3404 ipa_call_context::release (bool all)
3405 {
3406 /* See if context is initialized at first place. */
3407 if (!m_node)
3408 return;
3409 ipa_release_agg_values (m_known_aggs, all);
3410 if (all)
3411 {
3412 m_known_vals.release ();
3413 m_known_contexts.release ();
3414 m_inline_param_summary.release ();
3415 }
3416 }
3417
3418 /* Return true if CTX describes the same call context as THIS. */
3419
3420 bool
equal_to(const ipa_call_context & ctx)3421 ipa_call_context::equal_to (const ipa_call_context &ctx)
3422 {
3423 if (m_node != ctx.m_node
3424 || m_possible_truths != ctx.m_possible_truths
3425 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3426 return false;
3427
3428 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3429 unsigned int nargs = params_summary
3430 ? ipa_get_param_count (params_summary) : 0;
3431
3432 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3433 {
3434 for (unsigned int i = 0; i < nargs; i++)
3435 {
3436 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3437 continue;
3438 if (i >= m_inline_param_summary.length ()
3439 || m_inline_param_summary[i].useless_p ())
3440 {
3441 if (i < ctx.m_inline_param_summary.length ()
3442 && !ctx.m_inline_param_summary[i].useless_p ())
3443 return false;
3444 continue;
3445 }
3446 if (i >= ctx.m_inline_param_summary.length ()
3447 || ctx.m_inline_param_summary[i].useless_p ())
3448 {
3449 if (i < m_inline_param_summary.length ()
3450 && !m_inline_param_summary[i].useless_p ())
3451 return false;
3452 continue;
3453 }
3454 if (!m_inline_param_summary[i].equal_to
3455 (ctx.m_inline_param_summary[i]))
3456 return false;
3457 }
3458 }
3459 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3460 {
3461 for (unsigned int i = 0; i < nargs; i++)
3462 {
3463 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3464 continue;
3465 if (i >= m_known_vals.length () || !m_known_vals[i])
3466 {
3467 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3468 return false;
3469 continue;
3470 }
3471 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3472 {
3473 if (i < m_known_vals.length () && m_known_vals[i])
3474 return false;
3475 continue;
3476 }
3477 if (m_known_vals[i] != ctx.m_known_vals[i])
3478 return false;
3479 }
3480 }
3481 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3482 {
3483 for (unsigned int i = 0; i < nargs; i++)
3484 {
3485 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3486 continue;
3487 if (i >= m_known_contexts.length ()
3488 || m_known_contexts[i].useless_p ())
3489 {
3490 if (i < ctx.m_known_contexts.length ()
3491 && !ctx.m_known_contexts[i].useless_p ())
3492 return false;
3493 continue;
3494 }
3495 if (i >= ctx.m_known_contexts.length ()
3496 || ctx.m_known_contexts[i].useless_p ())
3497 {
3498 if (i < m_known_contexts.length ()
3499 && !m_known_contexts[i].useless_p ())
3500 return false;
3501 continue;
3502 }
3503 if (!m_known_contexts[i].equal_to
3504 (ctx.m_known_contexts[i]))
3505 return false;
3506 }
3507 }
3508 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3509 {
3510 for (unsigned int i = 0; i < nargs; i++)
3511 {
3512 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3513 continue;
3514 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3515 {
3516 if (i < ctx.m_known_aggs.length ()
3517 && !ctx.m_known_aggs[i].is_empty ())
3518 return false;
3519 continue;
3520 }
3521 if (i >= ctx.m_known_aggs.length ()
3522 || ctx.m_known_aggs[i].is_empty ())
3523 {
3524 if (i < m_known_aggs.length ()
3525 && !m_known_aggs[i].is_empty ())
3526 return false;
3527 continue;
3528 }
3529 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3530 return false;
3531 }
3532 }
3533 return true;
3534 }
3535
3536 /* Estimate size and time needed to execute call in the given context.
3537 Additionally determine hints determined by the context. Finally compute
3538 minimal size needed for the call that is independent on the call context and
3539 can be used for fast estimates. Return the values in RET_SIZE,
3540 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3541
3542 void
estimate_size_and_time(int * ret_size,int * ret_min_size,sreal * ret_time,sreal * ret_nonspecialized_time,ipa_hints * ret_hints)3543 ipa_call_context::estimate_size_and_time (int *ret_size,
3544 int *ret_min_size,
3545 sreal *ret_time,
3546 sreal *ret_nonspecialized_time,
3547 ipa_hints *ret_hints)
3548 {
3549 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3550 size_time_entry *e;
3551 int size = 0;
3552 sreal time = 0;
3553 int min_size = 0;
3554 ipa_hints hints = 0;
3555 int i;
3556
3557 if (dump_file && (dump_flags & TDF_DETAILS))
3558 {
3559 bool found = false;
3560 fprintf (dump_file, " Estimating body: %s\n"
3561 " Known to be false: ", m_node->dump_name ());
3562
3563 for (i = predicate::not_inlined_condition;
3564 i < (predicate::first_dynamic_condition
3565 + (int) vec_safe_length (info->conds)); i++)
3566 if (!(m_possible_truths & (1 << i)))
3567 {
3568 if (found)
3569 fprintf (dump_file, ", ");
3570 found = true;
3571 dump_condition (dump_file, info->conds, i);
3572 }
3573 }
3574
3575 if (m_node->callees || m_node->indirect_calls)
3576 estimate_calls_size_and_time (m_node, &size, &min_size,
3577 ret_time ? &time : NULL,
3578 ret_hints ? &hints : NULL, m_possible_truths,
3579 m_known_vals, m_known_contexts, m_known_aggs);
3580
3581 sreal nonspecialized_time = time;
3582
3583 min_size += (*info->size_time_table)[0].size;
3584 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3585 {
3586 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3587
3588 /* Because predicates are conservative, it can happen that nonconst is 1
3589 but exec is 0. */
3590 if (exec)
3591 {
3592 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3593
3594 gcc_checking_assert (e->time >= 0);
3595 gcc_checking_assert (time >= 0);
3596
3597 /* We compute specialized size only because size of nonspecialized
3598 copy is context independent.
3599
3600 The difference between nonspecialized execution and specialized is
3601 that nonspecialized is not going to have optimized out computations
3602 known to be constant in a specialized setting. */
3603 if (nonconst)
3604 size += e->size;
3605 if (!ret_time)
3606 continue;
3607 nonspecialized_time += e->time;
3608 if (!nonconst)
3609 ;
3610 else if (!m_inline_param_summary.exists ())
3611 {
3612 if (nonconst)
3613 time += e->time;
3614 }
3615 else
3616 {
3617 int prob = e->nonconst_predicate.probability
3618 (info->conds, m_possible_truths,
3619 m_inline_param_summary);
3620 gcc_checking_assert (prob >= 0);
3621 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3622 if (prob == REG_BR_PROB_BASE)
3623 time += e->time;
3624 else
3625 time += e->time * prob / REG_BR_PROB_BASE;
3626 }
3627 gcc_checking_assert (time >= 0);
3628 }
3629 }
3630 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3631 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3632 gcc_checking_assert (min_size >= 0);
3633 gcc_checking_assert (size >= 0);
3634 gcc_checking_assert (time >= 0);
3635 /* nonspecialized_time should be always bigger than specialized time.
3636 Roundoff issues however may get into the way. */
3637 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3638
3639 /* Roundoff issues may make specialized time bigger than nonspecialized
3640 time. We do not really want that to happen because some heuristics
3641 may get confused by seeing negative speedups. */
3642 if (time > nonspecialized_time)
3643 time = nonspecialized_time;
3644
3645 if (ret_hints)
3646 {
3647 if (info->loop_iterations
3648 && !info->loop_iterations->evaluate (m_possible_truths))
3649 hints |= INLINE_HINT_loop_iterations;
3650 if (info->loop_stride
3651 && !info->loop_stride->evaluate (m_possible_truths))
3652 hints |= INLINE_HINT_loop_stride;
3653 if (info->scc_no)
3654 hints |= INLINE_HINT_in_scc;
3655 if (DECL_DECLARED_INLINE_P (m_node->decl))
3656 hints |= INLINE_HINT_declared_inline;
3657 }
3658
3659 size = RDIV (size, ipa_fn_summary::size_scale);
3660 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3661
3662 if (dump_file && (dump_flags & TDF_DETAILS))
3663 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3664 time.to_double (), nonspecialized_time.to_double ());
3665 if (ret_time)
3666 *ret_time = time;
3667 if (ret_nonspecialized_time)
3668 *ret_nonspecialized_time = nonspecialized_time;
3669 if (ret_size)
3670 *ret_size = size;
3671 if (ret_min_size)
3672 *ret_min_size = min_size;
3673 if (ret_hints)
3674 *ret_hints = hints;
3675 return;
3676 }
3677
3678
3679 /* Estimate size and time needed to execute callee of EDGE assuming that
3680 parameters known to be constant at caller of EDGE are propagated.
3681 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3682 and types for parameters. */
3683
3684 void
estimate_ipcp_clone_size_and_time(struct cgraph_node * node,vec<tree> known_vals,vec<ipa_polymorphic_call_context> known_contexts,vec<ipa_agg_value_set> known_aggs,int * ret_size,sreal * ret_time,sreal * ret_nonspec_time,ipa_hints * hints)3685 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3686 vec<tree> known_vals,
3687 vec<ipa_polymorphic_call_context>
3688 known_contexts,
3689 vec<ipa_agg_value_set> known_aggs,
3690 int *ret_size, sreal *ret_time,
3691 sreal *ret_nonspec_time,
3692 ipa_hints *hints)
3693 {
3694 clause_t clause, nonspec_clause;
3695
3696 /* TODO: Also pass known value ranges. */
3697 evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3698 known_aggs, &clause, &nonspec_clause);
3699 ipa_call_context ctx (node, clause, nonspec_clause,
3700 known_vals, known_contexts,
3701 known_aggs, vNULL);
3702 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3703 ret_nonspec_time, hints);
3704 }
3705
3706 /* Return stack frame offset where frame of NODE is supposed to start inside
3707 of the function it is inlined to.
3708 Return 0 for functions that are not inlined. */
3709
3710 HOST_WIDE_INT
ipa_get_stack_frame_offset(struct cgraph_node * node)3711 ipa_get_stack_frame_offset (struct cgraph_node *node)
3712 {
3713 HOST_WIDE_INT offset = 0;
3714 if (!node->inlined_to)
3715 return 0;
3716 node = node->callers->caller;
3717 while (true)
3718 {
3719 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3720 if (!node->inlined_to)
3721 return offset;
3722 node = node->callers->caller;
3723 }
3724 }
3725
3726
3727 /* Update summary information of inline clones after inlining.
3728 Compute peak stack usage. */
3729
3730 static void
inline_update_callee_summaries(struct cgraph_node * node,int depth)3731 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3732 {
3733 struct cgraph_edge *e;
3734
3735 ipa_propagate_frequency (node);
3736 for (e = node->callees; e; e = e->next_callee)
3737 {
3738 if (!e->inline_failed)
3739 inline_update_callee_summaries (e->callee, depth);
3740 else
3741 ipa_call_summaries->get (e)->loop_depth += depth;
3742 }
3743 for (e = node->indirect_calls; e; e = e->next_callee)
3744 ipa_call_summaries->get (e)->loop_depth += depth;
3745 }
3746
3747 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3748 When function A is inlined in B and A calls C with parameter that
3749 changes with probability PROB1 and C is known to be passthrough
3750 of argument if B that change with probability PROB2, the probability
3751 of change is now PROB1*PROB2. */
3752
3753 static void
remap_edge_change_prob(struct cgraph_edge * inlined_edge,struct cgraph_edge * edge)3754 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3755 struct cgraph_edge *edge)
3756 {
3757 if (ipa_node_params_sum)
3758 {
3759 int i;
3760 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3761 if (!args)
3762 return;
3763 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3764 class ipa_call_summary *inlined_es
3765 = ipa_call_summaries->get (inlined_edge);
3766
3767 if (es->param.length () == 0)
3768 return;
3769
3770 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3771 {
3772 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3773 if (jfunc->type == IPA_JF_PASS_THROUGH
3774 || jfunc->type == IPA_JF_ANCESTOR)
3775 {
3776 int id = jfunc->type == IPA_JF_PASS_THROUGH
3777 ? ipa_get_jf_pass_through_formal_id (jfunc)
3778 : ipa_get_jf_ancestor_formal_id (jfunc);
3779 if (id < (int) inlined_es->param.length ())
3780 {
3781 int prob1 = es->param[i].change_prob;
3782 int prob2 = inlined_es->param[id].change_prob;
3783 int prob = combine_probabilities (prob1, prob2);
3784
3785 if (prob1 && prob2 && !prob)
3786 prob = 1;
3787
3788 es->param[i].change_prob = prob;
3789 }
3790 }
3791 }
3792 }
3793 }
3794
3795 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3796
3797 Remap predicates of callees of NODE. Rest of arguments match
3798 remap_predicate.
3799
3800 Also update change probabilities. */
3801
3802 static void
remap_edge_summaries(struct cgraph_edge * inlined_edge,struct cgraph_node * node,class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,vec<int> operand_map,vec<int> offset_map,clause_t possible_truths,predicate * toplev_predicate)3803 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3804 struct cgraph_node *node,
3805 class ipa_fn_summary *info,
3806 class ipa_node_params *params_summary,
3807 class ipa_fn_summary *callee_info,
3808 vec<int> operand_map,
3809 vec<int> offset_map,
3810 clause_t possible_truths,
3811 predicate *toplev_predicate)
3812 {
3813 struct cgraph_edge *e, *next;
3814 for (e = node->callees; e; e = next)
3815 {
3816 predicate p;
3817 next = e->next_callee;
3818
3819 if (e->inline_failed)
3820 {
3821 class ipa_call_summary *es = ipa_call_summaries->get (e);
3822 remap_edge_change_prob (inlined_edge, e);
3823
3824 if (es->predicate)
3825 {
3826 p = es->predicate->remap_after_inlining
3827 (info, params_summary,
3828 callee_info, operand_map,
3829 offset_map, possible_truths,
3830 *toplev_predicate);
3831 edge_set_predicate (e, &p);
3832 }
3833 else
3834 edge_set_predicate (e, toplev_predicate);
3835 }
3836 else
3837 remap_edge_summaries (inlined_edge, e->callee, info,
3838 params_summary, callee_info,
3839 operand_map, offset_map, possible_truths,
3840 toplev_predicate);
3841 }
3842 for (e = node->indirect_calls; e; e = next)
3843 {
3844 class ipa_call_summary *es = ipa_call_summaries->get (e);
3845 predicate p;
3846 next = e->next_callee;
3847
3848 remap_edge_change_prob (inlined_edge, e);
3849 if (es->predicate)
3850 {
3851 p = es->predicate->remap_after_inlining
3852 (info, params_summary,
3853 callee_info, operand_map, offset_map,
3854 possible_truths, *toplev_predicate);
3855 edge_set_predicate (e, &p);
3856 }
3857 else
3858 edge_set_predicate (e, toplev_predicate);
3859 }
3860 }
3861
3862 /* Same as remap_predicate, but set result into hint *HINT. */
3863
3864 static void
remap_hint_predicate(class ipa_fn_summary * info,class ipa_node_params * params_summary,class ipa_fn_summary * callee_info,predicate ** hint,vec<int> operand_map,vec<int> offset_map,clause_t possible_truths,predicate * toplev_predicate)3865 remap_hint_predicate (class ipa_fn_summary *info,
3866 class ipa_node_params *params_summary,
3867 class ipa_fn_summary *callee_info,
3868 predicate **hint,
3869 vec<int> operand_map,
3870 vec<int> offset_map,
3871 clause_t possible_truths,
3872 predicate *toplev_predicate)
3873 {
3874 predicate p;
3875
3876 if (!*hint)
3877 return;
3878 p = (*hint)->remap_after_inlining
3879 (info, params_summary, callee_info,
3880 operand_map, offset_map,
3881 possible_truths, *toplev_predicate);
3882 if (p != false && p != true)
3883 {
3884 if (!*hint)
3885 set_hint_predicate (hint, p);
3886 else
3887 **hint &= p;
3888 }
3889 }
3890
3891 /* We inlined EDGE. Update summary of the function we inlined into. */
3892
3893 void
ipa_merge_fn_summary_after_inlining(struct cgraph_edge * edge)3894 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3895 {
3896 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3897 struct cgraph_node *to = (edge->caller->inlined_to
3898 ? edge->caller->inlined_to : edge->caller);
3899 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3900 clause_t clause = 0; /* not_inline is known to be false. */
3901 size_time_entry *e;
3902 auto_vec<int, 8> operand_map;
3903 auto_vec<int, 8> offset_map;
3904 int i;
3905 predicate toplev_predicate;
3906 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3907 class ipa_node_params *params_summary = (ipa_node_params_sum
3908 ? IPA_NODE_REF (to) : NULL);
3909
3910 if (es->predicate)
3911 toplev_predicate = *es->predicate;
3912 else
3913 toplev_predicate = true;
3914
3915 info->fp_expressions |= callee_info->fp_expressions;
3916
3917 if (callee_info->conds)
3918 {
3919 auto_vec<tree, 32> known_vals;
3920 auto_vec<ipa_agg_value_set, 32> known_aggs;
3921 evaluate_properties_for_edge (edge, true, &clause, NULL,
3922 &known_vals, NULL, &known_aggs);
3923 }
3924 if (ipa_node_params_sum && callee_info->conds)
3925 {
3926 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3927 int count = args ? ipa_get_cs_argument_count (args) : 0;
3928 int i;
3929
3930 if (count)
3931 {
3932 operand_map.safe_grow_cleared (count);
3933 offset_map.safe_grow_cleared (count);
3934 }
3935 for (i = 0; i < count; i++)
3936 {
3937 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3938 int map = -1;
3939
3940 /* TODO: handle non-NOPs when merging. */
3941 if (jfunc->type == IPA_JF_PASS_THROUGH)
3942 {
3943 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3944 map = ipa_get_jf_pass_through_formal_id (jfunc);
3945 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3946 offset_map[i] = -1;
3947 }
3948 else if (jfunc->type == IPA_JF_ANCESTOR)
3949 {
3950 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3951 if (offset >= 0 && offset < INT_MAX)
3952 {
3953 map = ipa_get_jf_ancestor_formal_id (jfunc);
3954 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3955 offset = -1;
3956 offset_map[i] = offset;
3957 }
3958 }
3959 operand_map[i] = map;
3960 gcc_assert (map < ipa_get_param_count (params_summary));
3961 }
3962 }
3963 sreal freq = edge->sreal_frequency ();
3964 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3965 {
3966 predicate p;
3967 p = e->exec_predicate.remap_after_inlining
3968 (info, params_summary,
3969 callee_info, operand_map,
3970 offset_map, clause,
3971 toplev_predicate);
3972 predicate nonconstp;
3973 nonconstp = e->nonconst_predicate.remap_after_inlining
3974 (info, params_summary,
3975 callee_info, operand_map,
3976 offset_map, clause,
3977 toplev_predicate);
3978 if (p != false && nonconstp != false)
3979 {
3980 sreal add_time = ((sreal)e->time * freq);
3981 int prob = e->nonconst_predicate.probability (callee_info->conds,
3982 clause, es->param);
3983 if (prob != REG_BR_PROB_BASE)
3984 add_time = add_time * prob / REG_BR_PROB_BASE;
3985 if (prob != REG_BR_PROB_BASE
3986 && dump_file && (dump_flags & TDF_DETAILS))
3987 {
3988 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3989 (double) prob / REG_BR_PROB_BASE);
3990 }
3991 info->account_size_time (e->size, add_time, p, nonconstp);
3992 }
3993 }
3994 remap_edge_summaries (edge, edge->callee, info, params_summary,
3995 callee_info, operand_map,
3996 offset_map, clause, &toplev_predicate);
3997 remap_hint_predicate (info, params_summary, callee_info,
3998 &callee_info->loop_iterations,
3999 operand_map, offset_map, clause, &toplev_predicate);
4000 remap_hint_predicate (info, params_summary, callee_info,
4001 &callee_info->loop_stride,
4002 operand_map, offset_map, clause, &toplev_predicate);
4003
4004 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4005 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4006
4007 if (info->estimated_stack_size < peak)
4008 info->estimated_stack_size = peak;
4009
4010 inline_update_callee_summaries (edge->callee, es->loop_depth);
4011 if (info->call_size_time_table)
4012 {
4013 int edge_size = 0;
4014 sreal edge_time = 0;
4015
4016 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL,
4017 vNULL, vNULL, 0);
4018 /* Unaccount size and time of the optimized out call. */
4019 info->account_size_time (-edge_size, -edge_time,
4020 es->predicate ? *es->predicate : true,
4021 es->predicate ? *es->predicate : true,
4022 true);
4023 /* Account new calls. */
4024 summarize_calls_size_and_time (edge->callee, info);
4025 }
4026
4027 /* Free summaries that are not maintained for inline clones/edges. */
4028 ipa_call_summaries->remove (edge);
4029 ipa_fn_summaries->remove (edge->callee);
4030 ipa_remove_from_growth_caches (edge);
4031 }
4032
4033 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4034 overall size and time. Recompute it.
4035 If RESET is true also recompute call_time_size_table. */
4036
4037 void
ipa_update_overall_fn_summary(struct cgraph_node * node,bool reset)4038 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4039 {
4040 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4041 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4042 size_time_entry *e;
4043 int i;
4044
4045 size_info->size = 0;
4046 info->time = 0;
4047 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4048 {
4049 size_info->size += e->size;
4050 info->time += e->time;
4051 }
4052 info->min_size = (*info->size_time_table)[0].size;
4053 if (reset)
4054 vec_free (info->call_size_time_table);
4055 if (node->callees || node->indirect_calls)
4056 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4057 &info->time, NULL,
4058 ~(clause_t) (1 << predicate::false_condition),
4059 vNULL, vNULL, vNULL);
4060 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4061 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4062 }
4063
4064
4065 /* This function performs intraprocedural analysis in NODE that is required to
4066 inline indirect calls. */
4067
4068 static void
inline_indirect_intraprocedural_analysis(struct cgraph_node * node)4069 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4070 {
4071 ipa_analyze_node (node);
4072 if (dump_file && (dump_flags & TDF_DETAILS))
4073 {
4074 ipa_print_node_params (dump_file, node);
4075 ipa_print_node_jump_functions (dump_file, node);
4076 }
4077 }
4078
4079
4080 /* Note function body size. */
4081
4082 void
inline_analyze_function(struct cgraph_node * node)4083 inline_analyze_function (struct cgraph_node *node)
4084 {
4085 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4086
4087 if (dump_file)
4088 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4089 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4090 inline_indirect_intraprocedural_analysis (node);
4091 compute_fn_summary (node, false);
4092 if (!optimize)
4093 {
4094 struct cgraph_edge *e;
4095 for (e = node->callees; e; e = e->next_callee)
4096 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4097 for (e = node->indirect_calls; e; e = e->next_callee)
4098 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4099 }
4100
4101 pop_cfun ();
4102 }
4103
4104
4105 /* Called when new function is inserted to callgraph late. */
4106
4107 void
insert(struct cgraph_node * node,ipa_fn_summary *)4108 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4109 {
4110 inline_analyze_function (node);
4111 }
4112
4113 /* Note function body size. */
4114
4115 static void
ipa_fn_summary_generate(void)4116 ipa_fn_summary_generate (void)
4117 {
4118 struct cgraph_node *node;
4119
4120 FOR_EACH_DEFINED_FUNCTION (node)
4121 if (DECL_STRUCT_FUNCTION (node->decl))
4122 node->versionable = tree_versionable_function_p (node->decl);
4123
4124 ipa_fn_summary_alloc ();
4125
4126 ipa_fn_summaries->enable_insertion_hook ();
4127
4128 ipa_register_cgraph_hooks ();
4129
4130 FOR_EACH_DEFINED_FUNCTION (node)
4131 if (!node->alias
4132 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4133 || opt_for_fn (node->decl, optimize)))
4134 inline_analyze_function (node);
4135 }
4136
4137
4138 /* Write inline summary for edge E to OB. */
4139
4140 static void
read_ipa_call_summary(class lto_input_block * ib,struct cgraph_edge * e,bool prevails)4141 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4142 bool prevails)
4143 {
4144 class ipa_call_summary *es = prevails
4145 ? ipa_call_summaries->get_create (e) : NULL;
4146 predicate p;
4147 int length, i;
4148
4149 int size = streamer_read_uhwi (ib);
4150 int time = streamer_read_uhwi (ib);
4151 int depth = streamer_read_uhwi (ib);
4152
4153 if (es)
4154 {
4155 es->call_stmt_size = size;
4156 es->call_stmt_time = time;
4157 es->loop_depth = depth;
4158 }
4159
4160 bitpack_d bp = streamer_read_bitpack (ib);
4161 if (es)
4162 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4163 else
4164 bp_unpack_value (&bp, 1);
4165
4166 p.stream_in (ib);
4167 if (es)
4168 edge_set_predicate (e, &p);
4169 length = streamer_read_uhwi (ib);
4170 if (length && es && e->possibly_call_in_translation_unit_p ())
4171 {
4172 es->param.safe_grow_cleared (length);
4173 for (i = 0; i < length; i++)
4174 es->param[i].change_prob = streamer_read_uhwi (ib);
4175 }
4176 else
4177 {
4178 for (i = 0; i < length; i++)
4179 streamer_read_uhwi (ib);
4180 }
4181 }
4182
4183
4184 /* Stream in inline summaries from the section. */
4185
4186 static void
inline_read_section(struct lto_file_decl_data * file_data,const char * data,size_t len)4187 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4188 size_t len)
4189 {
4190 const struct lto_function_header *header =
4191 (const struct lto_function_header *) data;
4192 const int cfg_offset = sizeof (struct lto_function_header);
4193 const int main_offset = cfg_offset + header->cfg_size;
4194 const int string_offset = main_offset + header->main_size;
4195 class data_in *data_in;
4196 unsigned int i, count2, j;
4197 unsigned int f_count;
4198
4199 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4200 file_data->mode_table);
4201
4202 data_in =
4203 lto_data_in_create (file_data, (const char *) data + string_offset,
4204 header->string_size, vNULL);
4205 f_count = streamer_read_uhwi (&ib);
4206 for (i = 0; i < f_count; i++)
4207 {
4208 unsigned int index;
4209 struct cgraph_node *node;
4210 class ipa_fn_summary *info;
4211 class ipa_node_params *params_summary;
4212 class ipa_size_summary *size_info;
4213 lto_symtab_encoder_t encoder;
4214 struct bitpack_d bp;
4215 struct cgraph_edge *e;
4216 predicate p;
4217
4218 index = streamer_read_uhwi (&ib);
4219 encoder = file_data->symtab_node_encoder;
4220 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4221 index));
4222 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4223 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4224 size_info = node->prevailing_p ()
4225 ? ipa_size_summaries->get_create (node) : NULL;
4226
4227 int stack_size = streamer_read_uhwi (&ib);
4228 int size = streamer_read_uhwi (&ib);
4229 sreal time = sreal::stream_in (&ib);
4230
4231 if (info)
4232 {
4233 info->estimated_stack_size
4234 = size_info->estimated_self_stack_size = stack_size;
4235 size_info->size = size_info->self_size = size;
4236 info->time = time;
4237 }
4238
4239 bp = streamer_read_bitpack (&ib);
4240 if (info)
4241 {
4242 info->inlinable = bp_unpack_value (&bp, 1);
4243 /* On the side of streaming out, there is still one bit
4244 streamed out between inlinable and fp_expressions bits,
4245 which was used for cilk+ before but now always false.
4246 To remove the bit packing need to bump LTO minor version,
4247 so unpack a dummy bit here to keep consistent instead. */
4248 bp_unpack_value (&bp, 1);
4249 info->fp_expressions = bp_unpack_value (&bp, 1);
4250 }
4251 else
4252 {
4253 bp_unpack_value (&bp, 1);
4254 bp_unpack_value (&bp, 1);
4255 bp_unpack_value (&bp, 1);
4256 }
4257
4258 count2 = streamer_read_uhwi (&ib);
4259 gcc_assert (!info || !info->conds);
4260 if (info)
4261 vec_safe_reserve_exact (info->conds, count2);
4262 for (j = 0; j < count2; j++)
4263 {
4264 struct condition c;
4265 unsigned int k, count3;
4266 c.operand_num = streamer_read_uhwi (&ib);
4267 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4268 c.type = stream_read_tree (&ib, data_in);
4269 c.val = stream_read_tree (&ib, data_in);
4270 bp = streamer_read_bitpack (&ib);
4271 c.agg_contents = bp_unpack_value (&bp, 1);
4272 c.by_ref = bp_unpack_value (&bp, 1);
4273 if (c.agg_contents)
4274 c.offset = streamer_read_uhwi (&ib);
4275 count3 = streamer_read_uhwi (&ib);
4276 c.param_ops = NULL;
4277 if (info)
4278 vec_safe_reserve_exact (c.param_ops, count3);
4279 if (params_summary)
4280 ipa_set_param_used_by_ipa_predicates
4281 (params_summary, c.operand_num, true);
4282 for (k = 0; k < count3; k++)
4283 {
4284 struct expr_eval_op op;
4285 enum gimple_rhs_class rhs_class;
4286 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4287 op.type = stream_read_tree (&ib, data_in);
4288 switch (rhs_class = get_gimple_rhs_class (op.code))
4289 {
4290 case GIMPLE_UNARY_RHS:
4291 op.index = 0;
4292 op.val[0] = NULL_TREE;
4293 op.val[1] = NULL_TREE;
4294 break;
4295
4296 case GIMPLE_BINARY_RHS:
4297 case GIMPLE_TERNARY_RHS:
4298 bp = streamer_read_bitpack (&ib);
4299 op.index = bp_unpack_value (&bp, 2);
4300 op.val[0] = stream_read_tree (&ib, data_in);
4301 if (rhs_class == GIMPLE_BINARY_RHS)
4302 op.val[1] = NULL_TREE;
4303 else
4304 op.val[1] = stream_read_tree (&ib, data_in);
4305 break;
4306
4307 default:
4308 fatal_error (UNKNOWN_LOCATION,
4309 "invalid fnsummary in LTO stream");
4310 }
4311 if (info)
4312 c.param_ops->quick_push (op);
4313 }
4314 if (info)
4315 info->conds->quick_push (c);
4316 }
4317 count2 = streamer_read_uhwi (&ib);
4318 gcc_assert (!info || !info->size_time_table);
4319 if (info && count2)
4320 vec_safe_reserve_exact (info->size_time_table, count2);
4321 for (j = 0; j < count2; j++)
4322 {
4323 class size_time_entry e;
4324
4325 e.size = streamer_read_uhwi (&ib);
4326 e.time = sreal::stream_in (&ib);
4327 e.exec_predicate.stream_in (&ib);
4328 e.nonconst_predicate.stream_in (&ib);
4329
4330 if (info)
4331 info->size_time_table->quick_push (e);
4332 }
4333
4334 p.stream_in (&ib);
4335 if (info)
4336 set_hint_predicate (&info->loop_iterations, p);
4337 p.stream_in (&ib);
4338 if (info)
4339 set_hint_predicate (&info->loop_stride, p);
4340 for (e = node->callees; e; e = e->next_callee)
4341 read_ipa_call_summary (&ib, e, info != NULL);
4342 for (e = node->indirect_calls; e; e = e->next_callee)
4343 read_ipa_call_summary (&ib, e, info != NULL);
4344 }
4345
4346 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4347 len);
4348 lto_data_in_delete (data_in);
4349 }
4350
4351
4352 /* Read inline summary. Jump functions are shared among ipa-cp
4353 and inliner, so when ipa-cp is active, we don't need to write them
4354 twice. */
4355
4356 static void
ipa_fn_summary_read(void)4357 ipa_fn_summary_read (void)
4358 {
4359 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4360 struct lto_file_decl_data *file_data;
4361 unsigned int j = 0;
4362
4363 ipa_prop_read_jump_functions ();
4364 ipa_fn_summary_alloc ();
4365
4366 while ((file_data = file_data_vec[j++]))
4367 {
4368 size_t len;
4369 const char *data
4370 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4371 &len);
4372 if (data)
4373 inline_read_section (file_data, data, len);
4374 else
4375 /* Fatal error here. We do not want to support compiling ltrans units
4376 with different version of compiler or different flags than the WPA
4377 unit, so this should never happen. */
4378 fatal_error (input_location,
4379 "ipa inline summary is missing in input file");
4380 }
4381 ipa_register_cgraph_hooks ();
4382
4383 gcc_assert (ipa_fn_summaries);
4384 ipa_fn_summaries->enable_insertion_hook ();
4385 }
4386
4387
4388 /* Write inline summary for edge E to OB. */
4389
4390 static void
write_ipa_call_summary(struct output_block * ob,struct cgraph_edge * e)4391 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4392 {
4393 class ipa_call_summary *es = ipa_call_summaries->get (e);
4394 int i;
4395
4396 streamer_write_uhwi (ob, es->call_stmt_size);
4397 streamer_write_uhwi (ob, es->call_stmt_time);
4398 streamer_write_uhwi (ob, es->loop_depth);
4399
4400 bitpack_d bp = bitpack_create (ob->main_stream);
4401 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4402 streamer_write_bitpack (&bp);
4403
4404 if (es->predicate)
4405 es->predicate->stream_out (ob);
4406 else
4407 streamer_write_uhwi (ob, 0);
4408 streamer_write_uhwi (ob, es->param.length ());
4409 for (i = 0; i < (int) es->param.length (); i++)
4410 streamer_write_uhwi (ob, es->param[i].change_prob);
4411 }
4412
4413
4414 /* Write inline summary for node in SET.
4415 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4416 active, we don't need to write them twice. */
4417
4418 static void
ipa_fn_summary_write(void)4419 ipa_fn_summary_write (void)
4420 {
4421 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4422 lto_symtab_encoder_iterator lsei;
4423 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4424 unsigned int count = 0;
4425
4426 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4427 lsei_next_function_in_partition (&lsei))
4428 {
4429 cgraph_node *cnode = lsei_cgraph_node (lsei);
4430 if (cnode->definition && !cnode->alias)
4431 count++;
4432 }
4433 streamer_write_uhwi (ob, count);
4434
4435 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4436 lsei_next_function_in_partition (&lsei))
4437 {
4438 cgraph_node *cnode = lsei_cgraph_node (lsei);
4439 if (cnode->definition && !cnode->alias)
4440 {
4441 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4442 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4443 struct bitpack_d bp;
4444 struct cgraph_edge *edge;
4445 int i;
4446 size_time_entry *e;
4447 struct condition *c;
4448
4449 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4450 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4451 streamer_write_hwi (ob, size_info->self_size);
4452 info->time.stream_out (ob);
4453 bp = bitpack_create (ob->main_stream);
4454 bp_pack_value (&bp, info->inlinable, 1);
4455 bp_pack_value (&bp, false, 1);
4456 bp_pack_value (&bp, info->fp_expressions, 1);
4457 streamer_write_bitpack (&bp);
4458 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4459 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4460 {
4461 int j;
4462 struct expr_eval_op *op;
4463
4464 streamer_write_uhwi (ob, c->operand_num);
4465 streamer_write_uhwi (ob, c->code);
4466 stream_write_tree (ob, c->type, true);
4467 stream_write_tree (ob, c->val, true);
4468 bp = bitpack_create (ob->main_stream);
4469 bp_pack_value (&bp, c->agg_contents, 1);
4470 bp_pack_value (&bp, c->by_ref, 1);
4471 streamer_write_bitpack (&bp);
4472 if (c->agg_contents)
4473 streamer_write_uhwi (ob, c->offset);
4474 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4475 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4476 {
4477 streamer_write_uhwi (ob, op->code);
4478 stream_write_tree (ob, op->type, true);
4479 if (op->val[0])
4480 {
4481 bp = bitpack_create (ob->main_stream);
4482 bp_pack_value (&bp, op->index, 2);
4483 streamer_write_bitpack (&bp);
4484 stream_write_tree (ob, op->val[0], true);
4485 if (op->val[1])
4486 stream_write_tree (ob, op->val[1], true);
4487 }
4488 }
4489 }
4490 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4491 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4492 {
4493 streamer_write_uhwi (ob, e->size);
4494 e->time.stream_out (ob);
4495 e->exec_predicate.stream_out (ob);
4496 e->nonconst_predicate.stream_out (ob);
4497 }
4498 if (info->loop_iterations)
4499 info->loop_iterations->stream_out (ob);
4500 else
4501 streamer_write_uhwi (ob, 0);
4502 if (info->loop_stride)
4503 info->loop_stride->stream_out (ob);
4504 else
4505 streamer_write_uhwi (ob, 0);
4506 for (edge = cnode->callees; edge; edge = edge->next_callee)
4507 write_ipa_call_summary (ob, edge);
4508 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4509 write_ipa_call_summary (ob, edge);
4510 }
4511 }
4512 streamer_write_char_stream (ob->main_stream, 0);
4513 produce_asm (ob, NULL);
4514 destroy_output_block (ob);
4515
4516 ipa_prop_write_jump_functions ();
4517 }
4518
4519
4520 /* Release function summary. */
4521
4522 void
ipa_free_fn_summary(void)4523 ipa_free_fn_summary (void)
4524 {
4525 if (!ipa_call_summaries)
4526 return;
4527 ggc_delete (ipa_fn_summaries);
4528 ipa_fn_summaries = NULL;
4529 delete ipa_call_summaries;
4530 ipa_call_summaries = NULL;
4531 edge_predicate_pool.release ();
4532 /* During IPA this is one of largest datastructures to release. */
4533 if (flag_wpa)
4534 ggc_trim ();
4535 }
4536
4537 /* Release function summary. */
4538
4539 void
ipa_free_size_summary(void)4540 ipa_free_size_summary (void)
4541 {
4542 if (!ipa_size_summaries)
4543 return;
4544 delete ipa_size_summaries;
4545 ipa_size_summaries = NULL;
4546 }
4547
4548 namespace {
4549
4550 const pass_data pass_data_local_fn_summary =
4551 {
4552 GIMPLE_PASS, /* type */
4553 "local-fnsummary", /* name */
4554 OPTGROUP_INLINE, /* optinfo_flags */
4555 TV_INLINE_PARAMETERS, /* tv_id */
4556 0, /* properties_required */
4557 0, /* properties_provided */
4558 0, /* properties_destroyed */
4559 0, /* todo_flags_start */
4560 0, /* todo_flags_finish */
4561 };
4562
4563 class pass_local_fn_summary : public gimple_opt_pass
4564 {
4565 public:
pass_local_fn_summary(gcc::context * ctxt)4566 pass_local_fn_summary (gcc::context *ctxt)
4567 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4568 {}
4569
4570 /* opt_pass methods: */
clone()4571 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
execute(function *)4572 virtual unsigned int execute (function *)
4573 {
4574 return compute_fn_summary_for_current ();
4575 }
4576
4577 }; // class pass_local_fn_summary
4578
4579 } // anon namespace
4580
4581 gimple_opt_pass *
make_pass_local_fn_summary(gcc::context * ctxt)4582 make_pass_local_fn_summary (gcc::context *ctxt)
4583 {
4584 return new pass_local_fn_summary (ctxt);
4585 }
4586
4587
4588 /* Free inline summary. */
4589
4590 namespace {
4591
4592 const pass_data pass_data_ipa_free_fn_summary =
4593 {
4594 SIMPLE_IPA_PASS, /* type */
4595 "free-fnsummary", /* name */
4596 OPTGROUP_NONE, /* optinfo_flags */
4597 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4598 0, /* properties_required */
4599 0, /* properties_provided */
4600 0, /* properties_destroyed */
4601 0, /* todo_flags_start */
4602 0, /* todo_flags_finish */
4603 };
4604
4605 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4606 {
4607 public:
pass_ipa_free_fn_summary(gcc::context * ctxt)4608 pass_ipa_free_fn_summary (gcc::context *ctxt)
4609 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4610 small_p (false)
4611 {}
4612
4613 /* opt_pass methods: */
clone()4614 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
set_pass_param(unsigned int n,bool param)4615 void set_pass_param (unsigned int n, bool param)
4616 {
4617 gcc_assert (n == 0);
4618 small_p = param;
4619 }
gate(function *)4620 virtual bool gate (function *) { return true; }
execute(function *)4621 virtual unsigned int execute (function *)
4622 {
4623 ipa_free_fn_summary ();
4624 if (!flag_wpa)
4625 ipa_free_size_summary ();
4626 return 0;
4627 }
4628
4629 private:
4630 bool small_p;
4631 }; // class pass_ipa_free_fn_summary
4632
4633 } // anon namespace
4634
4635 simple_ipa_opt_pass *
make_pass_ipa_free_fn_summary(gcc::context * ctxt)4636 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4637 {
4638 return new pass_ipa_free_fn_summary (ctxt);
4639 }
4640
4641 namespace {
4642
4643 const pass_data pass_data_ipa_fn_summary =
4644 {
4645 IPA_PASS, /* type */
4646 "fnsummary", /* name */
4647 OPTGROUP_INLINE, /* optinfo_flags */
4648 TV_IPA_FNSUMMARY, /* tv_id */
4649 0, /* properties_required */
4650 0, /* properties_provided */
4651 0, /* properties_destroyed */
4652 0, /* todo_flags_start */
4653 ( TODO_dump_symtab ), /* todo_flags_finish */
4654 };
4655
4656 class pass_ipa_fn_summary : public ipa_opt_pass_d
4657 {
4658 public:
pass_ipa_fn_summary(gcc::context * ctxt)4659 pass_ipa_fn_summary (gcc::context *ctxt)
4660 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4661 ipa_fn_summary_generate, /* generate_summary */
4662 ipa_fn_summary_write, /* write_summary */
4663 ipa_fn_summary_read, /* read_summary */
4664 NULL, /* write_optimization_summary */
4665 NULL, /* read_optimization_summary */
4666 NULL, /* stmt_fixup */
4667 0, /* function_transform_todo_flags_start */
4668 NULL, /* function_transform */
4669 NULL) /* variable_transform */
4670 {}
4671
4672 /* opt_pass methods: */
execute(function *)4673 virtual unsigned int execute (function *) { return 0; }
4674
4675 }; // class pass_ipa_fn_summary
4676
4677 } // anon namespace
4678
4679 ipa_opt_pass_d *
make_pass_ipa_fn_summary(gcc::context * ctxt)4680 make_pass_ipa_fn_summary (gcc::context *ctxt)
4681 {
4682 return new pass_ipa_fn_summary (ctxt);
4683 }
4684
4685 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4686 within the same process. For use by toplev::finalize. */
4687
4688 void
ipa_fnsummary_c_finalize(void)4689 ipa_fnsummary_c_finalize (void)
4690 {
4691 ipa_free_fn_summary ();
4692 }
4693