1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
101 the second stage. */
102
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "predict.h"
110 #include "alloc-pool.h"
111 #include "tree-pass.h"
112 #include "cgraph.h"
113 #include "diagnostic.h"
114 #include "fold-const.h"
115 #include "gimple-fold.h"
116 #include "symbol-summary.h"
117 #include "tree-vrp.h"
118 #include "ipa-prop.h"
119 #include "tree-pretty-print.h"
120 #include "tree-inline.h"
121 #include "ipa-fnsummary.h"
122 #include "ipa-utils.h"
123 #include "tree-ssa-ccp.h"
124 #include "stringpool.h"
125 #include "attribs.h"
126
127 template <typename valtype> class ipcp_value;
128
129 /* Describes a particular source for an IPA-CP value. */
130
131 template <typename valtype>
132 struct ipcp_value_source
133 {
134 public:
135 /* Aggregate offset of the source, negative if the source is scalar value of
136 the argument itself. */
137 HOST_WIDE_INT offset;
138 /* The incoming edge that brought the value. */
139 cgraph_edge *cs;
140 /* If the jump function that resulted into his value was a pass-through or an
141 ancestor, this is the ipcp_value of the caller from which the described
142 value has been derived. Otherwise it is NULL. */
143 ipcp_value<valtype> *val;
144 /* Next pointer in a linked list of sources of a value. */
145 ipcp_value_source *next;
146 /* If the jump function that resulted into his value was a pass-through or an
147 ancestor, this is the index of the parameter of the caller the jump
148 function references. */
149 int index;
150 };
151
152 /* Common ancestor for all ipcp_value instantiations. */
153
154 class ipcp_value_base
155 {
156 public:
157 /* Time benefit and size cost that specializing the function for this value
158 would bring about in this function alone. */
159 int local_time_benefit, local_size_cost;
160 /* Time benefit and size cost that specializing the function for this value
161 can bring about in it's callees (transitively). */
162 int prop_time_benefit, prop_size_cost;
163
ipcp_value_base()164 ipcp_value_base ()
165 : local_time_benefit (0), local_size_cost (0),
166 prop_time_benefit (0), prop_size_cost (0) {}
167 };
168
169 /* Describes one particular value stored in struct ipcp_lattice. */
170
171 template <typename valtype>
172 class ipcp_value : public ipcp_value_base
173 {
174 public:
175 /* The actual value for the given parameter. */
176 valtype value;
177 /* The list of sources from which this value originates. */
178 ipcp_value_source <valtype> *sources;
179 /* Next pointers in a linked list of all values in a lattice. */
180 ipcp_value *next;
181 /* Next pointers in a linked list of values in a strongly connected component
182 of values. */
183 ipcp_value *scc_next;
184 /* Next pointers in a linked list of SCCs of values sorted topologically
185 according their sources. */
186 ipcp_value *topo_next;
187 /* A specialized node created for this value, NULL if none has been (so far)
188 created. */
189 cgraph_node *spec_node;
190 /* Depth first search number and low link for topological sorting of
191 values. */
192 int dfs, low_link;
193 /* True if this value is currently on the topo-sort stack. */
194 bool on_stack;
195
ipcp_value()196 ipcp_value()
197 : sources (0), next (0), scc_next (0), topo_next (0),
198 spec_node (0), dfs (0), low_link (0), on_stack (false) {}
199
200 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
201 HOST_WIDE_INT offset);
202 };
203
204 /* Lattice describing potential values of a formal parameter of a function, or
205 a part of an aggregate. TOP is represented by a lattice with zero values
206 and with contains_variable and bottom flags cleared. BOTTOM is represented
207 by a lattice with the bottom flag set. In that case, values and
208 contains_variable flag should be disregarded. */
209
210 template <typename valtype>
211 struct ipcp_lattice
212 {
213 public:
214 /* The list of known values and types in this lattice. Note that values are
215 not deallocated if a lattice is set to bottom because there may be value
216 sources referencing them. */
217 ipcp_value<valtype> *values;
218 /* Number of known values and types in this lattice. */
219 int values_count;
220 /* The lattice contains a variable component (in addition to values). */
221 bool contains_variable;
222 /* The value of the lattice is bottom (i.e. variable and unusable for any
223 propagation). */
224 bool bottom;
225
226 inline bool is_single_const ();
227 inline bool set_to_bottom ();
228 inline bool set_contains_variable ();
229 bool add_value (valtype newval, cgraph_edge *cs,
230 ipcp_value<valtype> *src_val = NULL,
231 int src_idx = 0, HOST_WIDE_INT offset = -1,
232 ipcp_value<valtype> **val_p = NULL,
233 bool unlimited = false);
234 void print (FILE * f, bool dump_sources, bool dump_benefits);
235 };
236
237 /* Lattice of tree values with an offset to describe a part of an
238 aggregate. */
239
240 struct ipcp_agg_lattice : public ipcp_lattice<tree>
241 {
242 public:
243 /* Offset that is being described by this lattice. */
244 HOST_WIDE_INT offset;
245 /* Size so that we don't have to re-compute it every time we traverse the
246 list. Must correspond to TYPE_SIZE of all lat values. */
247 HOST_WIDE_INT size;
248 /* Next element of the linked list. */
249 struct ipcp_agg_lattice *next;
250 };
251
252 /* Lattice of known bits, only capable of holding one value.
253 Bitwise constant propagation propagates which bits of a
254 value are constant.
255 For eg:
256 int f(int x)
257 {
258 return some_op (x);
259 }
260
261 int f1(int y)
262 {
263 if (cond)
264 return f (y & 0xff);
265 else
266 return f (y & 0xf);
267 }
268
269 In the above case, the param 'x' will always have all
270 the bits (except the bits in lsb) set to 0.
271 Hence the mask of 'x' would be 0xff. The mask
272 reflects that the bits in lsb are unknown.
273 The actual propagated value is given by m_value & ~m_mask. */
274
275 class ipcp_bits_lattice
276 {
277 public:
bottom_p()278 bool bottom_p () { return m_lattice_val == IPA_BITS_VARYING; }
top_p()279 bool top_p () { return m_lattice_val == IPA_BITS_UNDEFINED; }
constant_p()280 bool constant_p () { return m_lattice_val == IPA_BITS_CONSTANT; }
281 bool set_to_bottom ();
282 bool set_to_constant (widest_int, widest_int);
283
get_value()284 widest_int get_value () { return m_value; }
get_mask()285 widest_int get_mask () { return m_mask; }
286
287 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
288 enum tree_code, tree);
289
290 bool meet_with (widest_int, widest_int, unsigned);
291
292 void print (FILE *);
293
294 private:
295 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
296
297 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
298 If a bit in mask is set to 0, then the corresponding bit in
299 value is known to be constant. */
300 widest_int m_value, m_mask;
301
302 bool meet_with_1 (widest_int, widest_int, unsigned);
303 void get_value_and_mask (tree, widest_int *, widest_int *);
304 };
305
306 /* Lattice of value ranges. */
307
308 class ipcp_vr_lattice
309 {
310 public:
311 value_range m_vr;
312
313 inline bool bottom_p () const;
314 inline bool top_p () const;
315 inline bool set_to_bottom ();
316 bool meet_with (const value_range *p_vr);
317 bool meet_with (const ipcp_vr_lattice &other);
init()318 void init () { gcc_assert (m_vr.undefined_p ()); }
319 void print (FILE * f);
320
321 private:
322 bool meet_with_1 (const value_range *other_vr);
323 };
324
325 /* Structure containing lattices for a parameter itself and for pieces of
326 aggregates that are passed in the parameter or by a reference in a parameter
327 plus some other useful flags. */
328
329 class ipcp_param_lattices
330 {
331 public:
332 /* Lattice describing the value of the parameter itself. */
333 ipcp_lattice<tree> itself;
334 /* Lattice describing the polymorphic contexts of a parameter. */
335 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
336 /* Lattices describing aggregate parts. */
337 ipcp_agg_lattice *aggs;
338 /* Lattice describing known bits. */
339 ipcp_bits_lattice bits_lattice;
340 /* Lattice describing value range. */
341 ipcp_vr_lattice m_value_range;
342 /* Number of aggregate lattices */
343 int aggs_count;
344 /* True if aggregate data were passed by reference (as opposed to by
345 value). */
346 bool aggs_by_ref;
347 /* All aggregate lattices contain a variable component (in addition to
348 values). */
349 bool aggs_contain_variable;
350 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
351 for any propagation). */
352 bool aggs_bottom;
353
354 /* There is a virtual call based on this parameter. */
355 bool virt_call;
356 };
357
358 /* Allocation pools for values and their sources in ipa-cp. */
359
360 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
361 ("IPA-CP constant values");
362
363 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
364 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
365
366 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
367 ("IPA-CP value sources");
368
369 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
370 ("IPA_CP aggregate lattices");
371
372 /* Maximal count found in program. */
373
374 static profile_count max_count;
375
376 /* Original overall size of the program. */
377
378 static long overall_size, orig_overall_size;
379
380 /* Node name to unique clone suffix number map. */
381 static hash_map<const char *, unsigned> *clone_num_suffixes;
382
383 /* Return the param lattices structure corresponding to the Ith formal
384 parameter of the function described by INFO. */
385 static inline class ipcp_param_lattices *
ipa_get_parm_lattices(class ipa_node_params * info,int i)386 ipa_get_parm_lattices (class ipa_node_params *info, int i)
387 {
388 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
389 gcc_checking_assert (!info->ipcp_orig_node);
390 gcc_checking_assert (info->lattices);
391 return &(info->lattices[i]);
392 }
393
394 /* Return the lattice corresponding to the scalar value of the Ith formal
395 parameter of the function described by INFO. */
396 static inline ipcp_lattice<tree> *
ipa_get_scalar_lat(class ipa_node_params * info,int i)397 ipa_get_scalar_lat (class ipa_node_params *info, int i)
398 {
399 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
400 return &plats->itself;
401 }
402
403 /* Return the lattice corresponding to the scalar value of the Ith formal
404 parameter of the function described by INFO. */
405 static inline ipcp_lattice<ipa_polymorphic_call_context> *
ipa_get_poly_ctx_lat(class ipa_node_params * info,int i)406 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
407 {
408 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
409 return &plats->ctxlat;
410 }
411
412 /* Return whether LAT is a lattice with a single constant and without an
413 undefined value. */
414
415 template <typename valtype>
416 inline bool
is_single_const()417 ipcp_lattice<valtype>::is_single_const ()
418 {
419 if (bottom || contains_variable || values_count != 1)
420 return false;
421 else
422 return true;
423 }
424
425 /* Print V which is extracted from a value in a lattice to F. */
426
427 static void
print_ipcp_constant_value(FILE * f,tree v)428 print_ipcp_constant_value (FILE * f, tree v)
429 {
430 if (TREE_CODE (v) == ADDR_EXPR
431 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
432 {
433 fprintf (f, "& ");
434 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
435 }
436 else
437 print_generic_expr (f, v);
438 }
439
440 /* Print V which is extracted from a value in a lattice to F. */
441
442 static void
print_ipcp_constant_value(FILE * f,ipa_polymorphic_call_context v)443 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
444 {
445 v.dump(f, false);
446 }
447
448 /* Print a lattice LAT to F. */
449
450 template <typename valtype>
451 void
print(FILE * f,bool dump_sources,bool dump_benefits)452 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
453 {
454 ipcp_value<valtype> *val;
455 bool prev = false;
456
457 if (bottom)
458 {
459 fprintf (f, "BOTTOM\n");
460 return;
461 }
462
463 if (!values_count && !contains_variable)
464 {
465 fprintf (f, "TOP\n");
466 return;
467 }
468
469 if (contains_variable)
470 {
471 fprintf (f, "VARIABLE");
472 prev = true;
473 if (dump_benefits)
474 fprintf (f, "\n");
475 }
476
477 for (val = values; val; val = val->next)
478 {
479 if (dump_benefits && prev)
480 fprintf (f, " ");
481 else if (!dump_benefits && prev)
482 fprintf (f, ", ");
483 else
484 prev = true;
485
486 print_ipcp_constant_value (f, val->value);
487
488 if (dump_sources)
489 {
490 ipcp_value_source<valtype> *s;
491
492 fprintf (f, " [from:");
493 for (s = val->sources; s; s = s->next)
494 fprintf (f, " %i(%f)", s->cs->caller->order,
495 s->cs->sreal_frequency ().to_double ());
496 fprintf (f, "]");
497 }
498
499 if (dump_benefits)
500 fprintf (f, " [loc_time: %i, loc_size: %i, "
501 "prop_time: %i, prop_size: %i]\n",
502 val->local_time_benefit, val->local_size_cost,
503 val->prop_time_benefit, val->prop_size_cost);
504 }
505 if (!dump_benefits)
506 fprintf (f, "\n");
507 }
508
509 void
print(FILE * f)510 ipcp_bits_lattice::print (FILE *f)
511 {
512 if (top_p ())
513 fprintf (f, " Bits unknown (TOP)\n");
514 else if (bottom_p ())
515 fprintf (f, " Bits unusable (BOTTOM)\n");
516 else
517 {
518 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
519 fprintf (f, ", mask = "); print_hex (get_mask (), f);
520 fprintf (f, "\n");
521 }
522 }
523
524 /* Print value range lattice to F. */
525
526 void
print(FILE * f)527 ipcp_vr_lattice::print (FILE * f)
528 {
529 dump_value_range (f, &m_vr);
530 }
531
532 /* Print all ipcp_lattices of all functions to F. */
533
534 static void
print_all_lattices(FILE * f,bool dump_sources,bool dump_benefits)535 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
536 {
537 struct cgraph_node *node;
538 int i, count;
539
540 fprintf (f, "\nLattices:\n");
541 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
542 {
543 class ipa_node_params *info;
544
545 info = IPA_NODE_REF (node);
546 /* Skip unoptimized functions and constprop clones since we don't make
547 lattices for them. */
548 if (!info || info->ipcp_orig_node)
549 continue;
550 fprintf (f, " Node: %s:\n", node->dump_name ());
551 count = ipa_get_param_count (info);
552 for (i = 0; i < count; i++)
553 {
554 struct ipcp_agg_lattice *aglat;
555 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
556 fprintf (f, " param [%d]: ", i);
557 plats->itself.print (f, dump_sources, dump_benefits);
558 fprintf (f, " ctxs: ");
559 plats->ctxlat.print (f, dump_sources, dump_benefits);
560 plats->bits_lattice.print (f);
561 fprintf (f, " ");
562 plats->m_value_range.print (f);
563 fprintf (f, "\n");
564 if (plats->virt_call)
565 fprintf (f, " virt_call flag set\n");
566
567 if (plats->aggs_bottom)
568 {
569 fprintf (f, " AGGS BOTTOM\n");
570 continue;
571 }
572 if (plats->aggs_contain_variable)
573 fprintf (f, " AGGS VARIABLE\n");
574 for (aglat = plats->aggs; aglat; aglat = aglat->next)
575 {
576 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
577 plats->aggs_by_ref ? "ref " : "", aglat->offset);
578 aglat->print (f, dump_sources, dump_benefits);
579 }
580 }
581 }
582 }
583
584 /* Determine whether it is at all technically possible to create clones of NODE
585 and store this information in the ipa_node_params structure associated
586 with NODE. */
587
588 static void
determine_versionability(struct cgraph_node * node,class ipa_node_params * info)589 determine_versionability (struct cgraph_node *node,
590 class ipa_node_params *info)
591 {
592 const char *reason = NULL;
593
594 /* There are a number of generic reasons functions cannot be versioned. We
595 also cannot remove parameters if there are type attributes such as fnspec
596 present. */
597 if (node->alias || node->thunk.thunk_p)
598 reason = "alias or thunk";
599 else if (!node->versionable)
600 reason = "not a tree_versionable_function";
601 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
602 reason = "insufficient body availability";
603 else if (!opt_for_fn (node->decl, optimize)
604 || !opt_for_fn (node->decl, flag_ipa_cp))
605 reason = "non-optimized function";
606 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
607 {
608 /* Ideally we should clone the SIMD clones themselves and create
609 vector copies of them, so IPA-cp and SIMD clones can happily
610 coexist, but that may not be worth the effort. */
611 reason = "function has SIMD clones";
612 }
613 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
614 {
615 /* Ideally we should clone the target clones themselves and create
616 copies of them, so IPA-cp and target clones can happily
617 coexist, but that may not be worth the effort. */
618 reason = "function target_clones attribute";
619 }
620 /* Don't clone decls local to a comdat group; it breaks and for C++
621 decloned constructors, inlining is always better anyway. */
622 else if (node->comdat_local_p ())
623 reason = "comdat-local function";
624 else if (node->calls_comdat_local)
625 {
626 /* TODO: call is versionable if we make sure that all
627 callers are inside of a comdat group. */
628 reason = "calls comdat-local function";
629 }
630
631 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
632 work only when inlined. Cloning them may still lead to better code
633 because ipa-cp will not give up on cloning further. If the function is
634 external this however leads to wrong code because we may end up producing
635 offline copy of the function. */
636 if (DECL_EXTERNAL (node->decl))
637 for (cgraph_edge *edge = node->callees; !reason && edge;
638 edge = edge->next_callee)
639 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
640 {
641 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
642 reason = "external function which calls va_arg_pack";
643 if (DECL_FUNCTION_CODE (edge->callee->decl)
644 == BUILT_IN_VA_ARG_PACK_LEN)
645 reason = "external function which calls va_arg_pack_len";
646 }
647
648 if (reason && dump_file && !node->alias && !node->thunk.thunk_p)
649 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
650 node->dump_name (), reason);
651
652 info->versionable = (reason == NULL);
653 }
654
655 /* Return true if it is at all technically possible to create clones of a
656 NODE. */
657
658 static bool
ipcp_versionable_function_p(struct cgraph_node * node)659 ipcp_versionable_function_p (struct cgraph_node *node)
660 {
661 return IPA_NODE_REF (node) && IPA_NODE_REF (node)->versionable;
662 }
663
664 /* Structure holding accumulated information about callers of a node. */
665
666 struct caller_statistics
667 {
668 profile_count count_sum;
669 int n_calls, n_hot_calls, freq_sum;
670 };
671
672 /* Initialize fields of STAT to zeroes. */
673
674 static inline void
init_caller_stats(struct caller_statistics * stats)675 init_caller_stats (struct caller_statistics *stats)
676 {
677 stats->count_sum = profile_count::zero ();
678 stats->n_calls = 0;
679 stats->n_hot_calls = 0;
680 stats->freq_sum = 0;
681 }
682
683 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
684 non-thunk incoming edges to NODE. */
685
686 static bool
gather_caller_stats(struct cgraph_node * node,void * data)687 gather_caller_stats (struct cgraph_node *node, void *data)
688 {
689 struct caller_statistics *stats = (struct caller_statistics *) data;
690 struct cgraph_edge *cs;
691
692 for (cs = node->callers; cs; cs = cs->next_caller)
693 if (!cs->caller->thunk.thunk_p)
694 {
695 if (cs->count.ipa ().initialized_p ())
696 stats->count_sum += cs->count.ipa ();
697 stats->freq_sum += cs->frequency ();
698 stats->n_calls++;
699 if (cs->maybe_hot_p ())
700 stats->n_hot_calls ++;
701 }
702 return false;
703
704 }
705
706 /* Return true if this NODE is viable candidate for cloning. */
707
708 static bool
ipcp_cloning_candidate_p(struct cgraph_node * node)709 ipcp_cloning_candidate_p (struct cgraph_node *node)
710 {
711 struct caller_statistics stats;
712
713 gcc_checking_assert (node->has_gimple_body_p ());
714
715 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
716 {
717 if (dump_file)
718 fprintf (dump_file, "Not considering %s for cloning; "
719 "-fipa-cp-clone disabled.\n",
720 node->dump_name ());
721 return false;
722 }
723
724 if (node->optimize_for_size_p ())
725 {
726 if (dump_file)
727 fprintf (dump_file, "Not considering %s for cloning; "
728 "optimizing it for size.\n",
729 node->dump_name ());
730 return false;
731 }
732
733 init_caller_stats (&stats);
734 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
735
736 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
737 {
738 if (dump_file)
739 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
740 node->dump_name ());
741 return true;
742 }
743
744 /* When profile is available and function is hot, propagate into it even if
745 calls seems cold; constant propagation can improve function's speed
746 significantly. */
747 if (max_count > profile_count::zero ())
748 {
749 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
750 {
751 if (dump_file)
752 fprintf (dump_file, "Considering %s for cloning; "
753 "usually called directly.\n",
754 node->dump_name ());
755 return true;
756 }
757 }
758 if (!stats.n_hot_calls)
759 {
760 if (dump_file)
761 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
762 node->dump_name ());
763 return false;
764 }
765 if (dump_file)
766 fprintf (dump_file, "Considering %s for cloning.\n",
767 node->dump_name ());
768 return true;
769 }
770
771 template <typename valtype>
772 class value_topo_info
773 {
774 public:
775 /* Head of the linked list of topologically sorted values. */
776 ipcp_value<valtype> *values_topo;
777 /* Stack for creating SCCs, represented by a linked list too. */
778 ipcp_value<valtype> *stack;
779 /* Counter driving the algorithm in add_val_to_toposort. */
780 int dfs_counter;
781
value_topo_info()782 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
783 {}
784 void add_val (ipcp_value<valtype> *cur_val);
785 void propagate_effects ();
786 };
787
788 /* Arrays representing a topological ordering of call graph nodes and a stack
789 of nodes used during constant propagation and also data required to perform
790 topological sort of values and propagation of benefits in the determined
791 order. */
792
793 class ipa_topo_info
794 {
795 public:
796 /* Array with obtained topological order of cgraph nodes. */
797 struct cgraph_node **order;
798 /* Stack of cgraph nodes used during propagation within SCC until all values
799 in the SCC stabilize. */
800 struct cgraph_node **stack;
801 int nnodes, stack_top;
802
803 value_topo_info<tree> constants;
804 value_topo_info<ipa_polymorphic_call_context> contexts;
805
ipa_topo_info()806 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
807 constants ()
808 {}
809 };
810
811 /* Skip edges from and to nodes without ipa_cp enabled.
812 Ignore not available symbols. */
813
814 static bool
ignore_edge_p(cgraph_edge * e)815 ignore_edge_p (cgraph_edge *e)
816 {
817 enum availability avail;
818 cgraph_node *ultimate_target
819 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
820
821 return (avail <= AVAIL_INTERPOSABLE
822 || !opt_for_fn (ultimate_target->decl, optimize)
823 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
824 }
825
826 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
827
828 static void
build_toporder_info(class ipa_topo_info * topo)829 build_toporder_info (class ipa_topo_info *topo)
830 {
831 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
832 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
833
834 gcc_checking_assert (topo->stack_top == 0);
835 topo->nnodes = ipa_reduced_postorder (topo->order, true,
836 ignore_edge_p);
837 }
838
839 /* Free information about strongly connected components and the arrays in
840 TOPO. */
841
842 static void
free_toporder_info(class ipa_topo_info * topo)843 free_toporder_info (class ipa_topo_info *topo)
844 {
845 ipa_free_postorder_info ();
846 free (topo->order);
847 free (topo->stack);
848 }
849
850 /* Add NODE to the stack in TOPO, unless it is already there. */
851
852 static inline void
push_node_to_stack(class ipa_topo_info * topo,struct cgraph_node * node)853 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
854 {
855 class ipa_node_params *info = IPA_NODE_REF (node);
856 if (info->node_enqueued)
857 return;
858 info->node_enqueued = 1;
859 topo->stack[topo->stack_top++] = node;
860 }
861
862 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
863 is empty. */
864
865 static struct cgraph_node *
pop_node_from_stack(class ipa_topo_info * topo)866 pop_node_from_stack (class ipa_topo_info *topo)
867 {
868 if (topo->stack_top)
869 {
870 struct cgraph_node *node;
871 topo->stack_top--;
872 node = topo->stack[topo->stack_top];
873 IPA_NODE_REF (node)->node_enqueued = 0;
874 return node;
875 }
876 else
877 return NULL;
878 }
879
880 /* Set lattice LAT to bottom and return true if it previously was not set as
881 such. */
882
883 template <typename valtype>
884 inline bool
set_to_bottom()885 ipcp_lattice<valtype>::set_to_bottom ()
886 {
887 bool ret = !bottom;
888 bottom = true;
889 return ret;
890 }
891
892 /* Mark lattice as containing an unknown value and return true if it previously
893 was not marked as such. */
894
895 template <typename valtype>
896 inline bool
set_contains_variable()897 ipcp_lattice<valtype>::set_contains_variable ()
898 {
899 bool ret = !contains_variable;
900 contains_variable = true;
901 return ret;
902 }
903
904 /* Set all aggregate lattices in PLATS to bottom and return true if they were
905 not previously set as such. */
906
907 static inline bool
set_agg_lats_to_bottom(class ipcp_param_lattices * plats)908 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
909 {
910 bool ret = !plats->aggs_bottom;
911 plats->aggs_bottom = true;
912 return ret;
913 }
914
915 /* Mark all aggregate lattices in PLATS as containing an unknown value and
916 return true if they were not previously marked as such. */
917
918 static inline bool
set_agg_lats_contain_variable(class ipcp_param_lattices * plats)919 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
920 {
921 bool ret = !plats->aggs_contain_variable;
922 plats->aggs_contain_variable = true;
923 return ret;
924 }
925
926 bool
meet_with(const ipcp_vr_lattice & other)927 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
928 {
929 return meet_with_1 (&other.m_vr);
930 }
931
932 /* Meet the current value of the lattice with value range described by VR
933 lattice. */
934
935 bool
meet_with(const value_range * p_vr)936 ipcp_vr_lattice::meet_with (const value_range *p_vr)
937 {
938 return meet_with_1 (p_vr);
939 }
940
941 /* Meet the current value of the lattice with value range described by
942 OTHER_VR lattice. Return TRUE if anything changed. */
943
944 bool
meet_with_1(const value_range * other_vr)945 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
946 {
947 if (bottom_p ())
948 return false;
949
950 if (other_vr->varying_p ())
951 return set_to_bottom ();
952
953 value_range save (m_vr);
954 m_vr.union_ (other_vr);
955 return !m_vr.equal_p (save);
956 }
957
958 /* Return true if value range information in the lattice is yet unknown. */
959
960 bool
top_p()961 ipcp_vr_lattice::top_p () const
962 {
963 return m_vr.undefined_p ();
964 }
965
966 /* Return true if value range information in the lattice is known to be
967 unusable. */
968
969 bool
bottom_p()970 ipcp_vr_lattice::bottom_p () const
971 {
972 return m_vr.varying_p ();
973 }
974
975 /* Set value range information in the lattice to bottom. Return true if it
976 previously was in a different state. */
977
978 bool
set_to_bottom()979 ipcp_vr_lattice::set_to_bottom ()
980 {
981 if (m_vr.varying_p ())
982 return false;
983 /* ?? We create all sorts of VARYING ranges for floats, structures,
984 and other types which we cannot handle as ranges. We should
985 probably avoid handling them throughout the pass, but it's easier
986 to create a sensible VARYING here and let the lattice
987 propagate. */
988 m_vr.set_varying (integer_type_node);
989 return true;
990 }
991
992 /* Set lattice value to bottom, if it already isn't the case. */
993
994 bool
set_to_bottom()995 ipcp_bits_lattice::set_to_bottom ()
996 {
997 if (bottom_p ())
998 return false;
999 m_lattice_val = IPA_BITS_VARYING;
1000 m_value = 0;
1001 m_mask = -1;
1002 return true;
1003 }
1004
1005 /* Set to constant if it isn't already. Only meant to be called
1006 when switching state from TOP. */
1007
1008 bool
set_to_constant(widest_int value,widest_int mask)1009 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1010 {
1011 gcc_assert (top_p ());
1012 m_lattice_val = IPA_BITS_CONSTANT;
1013 m_value = wi::bit_and (wi::bit_not (mask), value);
1014 m_mask = mask;
1015 return true;
1016 }
1017
1018 /* Convert operand to value, mask form. */
1019
1020 void
get_value_and_mask(tree operand,widest_int * valuep,widest_int * maskp)1021 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1022 {
1023 wide_int get_nonzero_bits (const_tree);
1024
1025 if (TREE_CODE (operand) == INTEGER_CST)
1026 {
1027 *valuep = wi::to_widest (operand);
1028 *maskp = 0;
1029 }
1030 else
1031 {
1032 *valuep = 0;
1033 *maskp = -1;
1034 }
1035 }
1036
1037 /* Meet operation, similar to ccp_lattice_meet, we xor values
1038 if this->value, value have different values at same bit positions, we want
1039 to drop that bit to varying. Return true if mask is changed.
1040 This function assumes that the lattice value is in CONSTANT state */
1041
1042 bool
meet_with_1(widest_int value,widest_int mask,unsigned precision)1043 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1044 unsigned precision)
1045 {
1046 gcc_assert (constant_p ());
1047
1048 widest_int old_mask = m_mask;
1049 m_mask = (m_mask | mask) | (m_value ^ value);
1050 m_value &= ~m_mask;
1051
1052 if (wi::sext (m_mask, precision) == -1)
1053 return set_to_bottom ();
1054
1055 return m_mask != old_mask;
1056 }
1057
1058 /* Meet the bits lattice with operand
1059 described by <value, mask, sgn, precision. */
1060
1061 bool
meet_with(widest_int value,widest_int mask,unsigned precision)1062 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1063 unsigned precision)
1064 {
1065 if (bottom_p ())
1066 return false;
1067
1068 if (top_p ())
1069 {
1070 if (wi::sext (mask, precision) == -1)
1071 return set_to_bottom ();
1072 return set_to_constant (value, mask);
1073 }
1074
1075 return meet_with_1 (value, mask, precision);
1076 }
1077
1078 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1079 if code is binary operation or bit_value_unop (other) if code is unary op.
1080 In the case when code is nop_expr, no adjustment is required. */
1081
1082 bool
meet_with(ipcp_bits_lattice & other,unsigned precision,signop sgn,enum tree_code code,tree operand)1083 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1084 signop sgn, enum tree_code code, tree operand)
1085 {
1086 if (other.bottom_p ())
1087 return set_to_bottom ();
1088
1089 if (bottom_p () || other.top_p ())
1090 return false;
1091
1092 widest_int adjusted_value, adjusted_mask;
1093
1094 if (TREE_CODE_CLASS (code) == tcc_binary)
1095 {
1096 tree type = TREE_TYPE (operand);
1097 widest_int o_value, o_mask;
1098 get_value_and_mask (operand, &o_value, &o_mask);
1099
1100 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1101 sgn, precision, other.get_value (), other.get_mask (),
1102 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1103
1104 if (wi::sext (adjusted_mask, precision) == -1)
1105 return set_to_bottom ();
1106 }
1107
1108 else if (TREE_CODE_CLASS (code) == tcc_unary)
1109 {
1110 bit_value_unop (code, sgn, precision, &adjusted_value,
1111 &adjusted_mask, sgn, precision, other.get_value (),
1112 other.get_mask ());
1113
1114 if (wi::sext (adjusted_mask, precision) == -1)
1115 return set_to_bottom ();
1116 }
1117
1118 else
1119 return set_to_bottom ();
1120
1121 if (top_p ())
1122 {
1123 if (wi::sext (adjusted_mask, precision) == -1)
1124 return set_to_bottom ();
1125 return set_to_constant (adjusted_value, adjusted_mask);
1126 }
1127 else
1128 return meet_with_1 (adjusted_value, adjusted_mask, precision);
1129 }
1130
1131 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1132 return true is any of them has not been marked as such so far. */
1133
1134 static inline bool
set_all_contains_variable(class ipcp_param_lattices * plats)1135 set_all_contains_variable (class ipcp_param_lattices *plats)
1136 {
1137 bool ret;
1138 ret = plats->itself.set_contains_variable ();
1139 ret |= plats->ctxlat.set_contains_variable ();
1140 ret |= set_agg_lats_contain_variable (plats);
1141 ret |= plats->bits_lattice.set_to_bottom ();
1142 ret |= plats->m_value_range.set_to_bottom ();
1143 return ret;
1144 }
1145
1146 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1147 points to by the number of callers to NODE. */
1148
1149 static bool
count_callers(cgraph_node * node,void * data)1150 count_callers (cgraph_node *node, void *data)
1151 {
1152 int *caller_count = (int *) data;
1153
1154 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1155 /* Local thunks can be handled transparently, but if the thunk cannot
1156 be optimized out, count it as a real use. */
1157 if (!cs->caller->thunk.thunk_p || !cs->caller->local)
1158 ++*caller_count;
1159 return false;
1160 }
1161
1162 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1163 the one caller of some other node. Set the caller's corresponding flag. */
1164
1165 static bool
set_single_call_flag(cgraph_node * node,void *)1166 set_single_call_flag (cgraph_node *node, void *)
1167 {
1168 cgraph_edge *cs = node->callers;
1169 /* Local thunks can be handled transparently, skip them. */
1170 while (cs && cs->caller->thunk.thunk_p && cs->caller->local)
1171 cs = cs->next_caller;
1172 if (cs && IPA_NODE_REF (cs->caller))
1173 {
1174 IPA_NODE_REF (cs->caller)->node_calling_single_call = true;
1175 return true;
1176 }
1177 return false;
1178 }
1179
1180 /* Initialize ipcp_lattices. */
1181
1182 static void
initialize_node_lattices(struct cgraph_node * node)1183 initialize_node_lattices (struct cgraph_node *node)
1184 {
1185 class ipa_node_params *info = IPA_NODE_REF (node);
1186 struct cgraph_edge *ie;
1187 bool disable = false, variable = false;
1188 int i;
1189
1190 gcc_checking_assert (node->has_gimple_body_p ());
1191
1192 if (!ipa_get_param_count (info))
1193 disable = true;
1194 else if (node->local)
1195 {
1196 int caller_count = 0;
1197 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1198 true);
1199 gcc_checking_assert (caller_count > 0);
1200 if (caller_count == 1)
1201 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1202 NULL, true);
1203 }
1204 else
1205 {
1206 /* When cloning is allowed, we can assume that externally visible
1207 functions are not called. We will compensate this by cloning
1208 later. */
1209 if (ipcp_versionable_function_p (node)
1210 && ipcp_cloning_candidate_p (node))
1211 variable = true;
1212 else
1213 disable = true;
1214 }
1215
1216 if (dump_file && (dump_flags & TDF_DETAILS)
1217 && !node->alias && !node->thunk.thunk_p)
1218 {
1219 fprintf (dump_file, "Initializing lattices of %s\n",
1220 node->dump_name ());
1221 if (disable || variable)
1222 fprintf (dump_file, " Marking all lattices as %s\n",
1223 disable ? "BOTTOM" : "VARIABLE");
1224 }
1225
1226 auto_vec<bool, 16> surviving_params;
1227 bool pre_modified = false;
1228 if (!disable && node->clone.param_adjustments)
1229 {
1230 /* At the moment all IPA optimizations should use the number of
1231 parameters of the prevailing decl as the m_always_copy_start.
1232 Handling any other value would complicate the code below, so for the
1233 time bing let's only assert it is so. */
1234 gcc_assert ((node->clone.param_adjustments->m_always_copy_start
1235 == ipa_get_param_count (info))
1236 || node->clone.param_adjustments->m_always_copy_start < 0);
1237
1238 pre_modified = true;
1239 node->clone.param_adjustments->get_surviving_params (&surviving_params);
1240
1241 if (dump_file && (dump_flags & TDF_DETAILS)
1242 && !node->alias && !node->thunk.thunk_p)
1243 {
1244 bool first = true;
1245 for (int j = 0; j < ipa_get_param_count (info); j++)
1246 {
1247 if (j < (int) surviving_params.length ()
1248 && surviving_params[j])
1249 continue;
1250 if (first)
1251 {
1252 fprintf (dump_file,
1253 " The following parameters are dead on arrival:");
1254 first = false;
1255 }
1256 fprintf (dump_file, " %u", j);
1257 }
1258 if (!first)
1259 fprintf (dump_file, "\n");
1260 }
1261 }
1262
1263 for (i = 0; i < ipa_get_param_count (info); i++)
1264 {
1265 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1266 if (disable
1267 || (pre_modified && (surviving_params.length () <= (unsigned) i
1268 || !surviving_params[i])))
1269 {
1270 plats->itself.set_to_bottom ();
1271 plats->ctxlat.set_to_bottom ();
1272 set_agg_lats_to_bottom (plats);
1273 plats->bits_lattice.set_to_bottom ();
1274 plats->m_value_range.set_to_bottom ();
1275 }
1276 else
1277 {
1278 plats->m_value_range.init ();
1279 if (variable)
1280 set_all_contains_variable (plats);
1281 }
1282 }
1283
1284 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1285 if (ie->indirect_info->polymorphic
1286 && ie->indirect_info->param_index >= 0)
1287 {
1288 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1289 ipa_get_parm_lattices (info,
1290 ie->indirect_info->param_index)->virt_call = 1;
1291 }
1292 }
1293
1294 /* Return the result of a (possibly arithmetic) operation on the constant
1295 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1296 the type of the parameter to which the result is passed. Return
1297 NULL_TREE if that cannot be determined or be considered an
1298 interprocedural invariant. */
1299
1300 static tree
ipa_get_jf_arith_result(enum tree_code opcode,tree input,tree operand,tree res_type)1301 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1302 tree res_type)
1303 {
1304 tree res;
1305
1306 if (opcode == NOP_EXPR)
1307 return input;
1308 if (!is_gimple_ip_invariant (input))
1309 return NULL_TREE;
1310
1311 if (!res_type)
1312 {
1313 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1314 res_type = boolean_type_node;
1315 else if (expr_type_first_operand_type_p (opcode))
1316 res_type = TREE_TYPE (input);
1317 else
1318 return NULL_TREE;
1319 }
1320
1321 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1322 res = fold_unary (opcode, res_type, input);
1323 else
1324 res = fold_binary (opcode, res_type, input, operand);
1325
1326 if (res && !is_gimple_ip_invariant (res))
1327 return NULL_TREE;
1328
1329 return res;
1330 }
1331
1332 /* Return the result of a (possibly arithmetic) pass through jump function
1333 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1334 to which the result is passed. Return NULL_TREE if that cannot be
1335 determined or be considered an interprocedural invariant. */
1336
1337 static tree
ipa_get_jf_pass_through_result(struct ipa_jump_func * jfunc,tree input,tree res_type)1338 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1339 tree res_type)
1340 {
1341 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1342 input,
1343 ipa_get_jf_pass_through_operand (jfunc),
1344 res_type);
1345 }
1346
1347 /* Return the result of an ancestor jump function JFUNC on the constant value
1348 INPUT. Return NULL_TREE if that cannot be determined. */
1349
1350 static tree
ipa_get_jf_ancestor_result(struct ipa_jump_func * jfunc,tree input)1351 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1352 {
1353 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1354 if (TREE_CODE (input) == ADDR_EXPR)
1355 {
1356 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1357 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1358 if (known_eq (off, 0))
1359 return input;
1360 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1361 return build1 (ADDR_EXPR, TREE_TYPE (input),
1362 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1363 build_int_cst (ptr_type_node, byte_offset)));
1364 }
1365 else
1366 return NULL_TREE;
1367 }
1368
1369 /* Determine whether JFUNC evaluates to a single known constant value and if
1370 so, return it. Otherwise return NULL. INFO describes the caller node or
1371 the one it is inlined to, so that pass-through jump functions can be
1372 evaluated. PARM_TYPE is the type of the parameter to which the result is
1373 passed. */
1374
1375 tree
ipa_value_from_jfunc(class ipa_node_params * info,struct ipa_jump_func * jfunc,tree parm_type)1376 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1377 tree parm_type)
1378 {
1379 if (jfunc->type == IPA_JF_CONST)
1380 return ipa_get_jf_constant (jfunc);
1381 else if (jfunc->type == IPA_JF_PASS_THROUGH
1382 || jfunc->type == IPA_JF_ANCESTOR)
1383 {
1384 tree input;
1385 int idx;
1386
1387 if (jfunc->type == IPA_JF_PASS_THROUGH)
1388 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1389 else
1390 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1391
1392 if (info->ipcp_orig_node)
1393 input = info->known_csts[idx];
1394 else
1395 {
1396 ipcp_lattice<tree> *lat;
1397
1398 if (!info->lattices
1399 || idx >= ipa_get_param_count (info))
1400 return NULL_TREE;
1401 lat = ipa_get_scalar_lat (info, idx);
1402 if (!lat->is_single_const ())
1403 return NULL_TREE;
1404 input = lat->values->value;
1405 }
1406
1407 if (!input)
1408 return NULL_TREE;
1409
1410 if (jfunc->type == IPA_JF_PASS_THROUGH)
1411 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1412 else
1413 return ipa_get_jf_ancestor_result (jfunc, input);
1414 }
1415 else
1416 return NULL_TREE;
1417 }
1418
1419 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1420 that INFO describes the caller node or the one it is inlined to, CS is the
1421 call graph edge corresponding to JFUNC and CSIDX index of the described
1422 parameter. */
1423
1424 ipa_polymorphic_call_context
ipa_context_from_jfunc(ipa_node_params * info,cgraph_edge * cs,int csidx,ipa_jump_func * jfunc)1425 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1426 ipa_jump_func *jfunc)
1427 {
1428 ipa_edge_args *args = IPA_EDGE_REF (cs);
1429 ipa_polymorphic_call_context ctx;
1430 ipa_polymorphic_call_context *edge_ctx
1431 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1432
1433 if (edge_ctx && !edge_ctx->useless_p ())
1434 ctx = *edge_ctx;
1435
1436 if (jfunc->type == IPA_JF_PASS_THROUGH
1437 || jfunc->type == IPA_JF_ANCESTOR)
1438 {
1439 ipa_polymorphic_call_context srcctx;
1440 int srcidx;
1441 bool type_preserved = true;
1442 if (jfunc->type == IPA_JF_PASS_THROUGH)
1443 {
1444 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1445 return ctx;
1446 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1447 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1448 }
1449 else
1450 {
1451 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1452 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1453 }
1454 if (info->ipcp_orig_node)
1455 {
1456 if (info->known_contexts.exists ())
1457 srcctx = info->known_contexts[srcidx];
1458 }
1459 else
1460 {
1461 if (!info->lattices
1462 || srcidx >= ipa_get_param_count (info))
1463 return ctx;
1464 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1465 lat = ipa_get_poly_ctx_lat (info, srcidx);
1466 if (!lat->is_single_const ())
1467 return ctx;
1468 srcctx = lat->values->value;
1469 }
1470 if (srcctx.useless_p ())
1471 return ctx;
1472 if (jfunc->type == IPA_JF_ANCESTOR)
1473 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1474 if (!type_preserved)
1475 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1476 srcctx.combine_with (ctx);
1477 return srcctx;
1478 }
1479
1480 return ctx;
1481 }
1482
1483 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1484 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1485 the result is a range or an anti-range. */
1486
1487 static bool
ipa_vr_operation_and_type_effects(value_range * dst_vr,value_range * src_vr,enum tree_code operation,tree dst_type,tree src_type)1488 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1489 value_range *src_vr,
1490 enum tree_code operation,
1491 tree dst_type, tree src_type)
1492 {
1493 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1494 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1495 return false;
1496 return true;
1497 }
1498
1499 /* Determine value_range of JFUNC given that INFO describes the caller node or
1500 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1501 and PARM_TYPE of the parameter. */
1502
1503 value_range
ipa_value_range_from_jfunc(ipa_node_params * info,cgraph_edge * cs,ipa_jump_func * jfunc,tree parm_type)1504 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1505 ipa_jump_func *jfunc, tree parm_type)
1506 {
1507 value_range vr;
1508 return vr;
1509 if (jfunc->m_vr)
1510 ipa_vr_operation_and_type_effects (&vr,
1511 jfunc->m_vr,
1512 NOP_EXPR, parm_type,
1513 jfunc->m_vr->type ());
1514 if (vr.singleton_p ())
1515 return vr;
1516 if (jfunc->type == IPA_JF_PASS_THROUGH)
1517 {
1518 int idx;
1519 ipcp_transformation *sum
1520 = ipcp_get_transformation_summary (cs->caller->inlined_to
1521 ? cs->caller->inlined_to
1522 : cs->caller);
1523 if (!sum || !sum->m_vr)
1524 return vr;
1525
1526 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1527
1528 if (!(*sum->m_vr)[idx].known)
1529 return vr;
1530 tree vr_type = ipa_get_type (info, idx);
1531 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1532 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1533 (*sum->m_vr)[idx].type);
1534
1535 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1536
1537 if (TREE_CODE_CLASS (operation) == tcc_unary)
1538 {
1539 value_range res;
1540
1541 if (ipa_vr_operation_and_type_effects (&res,
1542 &srcvr,
1543 operation, parm_type,
1544 vr_type))
1545 vr.intersect (res);
1546 }
1547 else
1548 {
1549 value_range op_res, res;
1550 tree op = ipa_get_jf_pass_through_operand (jfunc);
1551 value_range op_vr (op, op);
1552
1553 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1554 if (ipa_vr_operation_and_type_effects (&res,
1555 &op_res,
1556 NOP_EXPR, parm_type,
1557 vr_type))
1558 vr.intersect (res);
1559 }
1560 }
1561 return vr;
1562 }
1563
1564 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1565 parameter with the given INDEX. */
1566
1567 static tree
get_clone_agg_value(struct cgraph_node * node,HOST_WIDE_INT offset,int index)1568 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1569 int index)
1570 {
1571 struct ipa_agg_replacement_value *aggval;
1572
1573 aggval = ipa_get_agg_replacements_for_node (node);
1574 while (aggval)
1575 {
1576 if (aggval->offset == offset
1577 && aggval->index == index)
1578 return aggval->value;
1579 aggval = aggval->next;
1580 }
1581 return NULL_TREE;
1582 }
1583
1584 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1585 single known constant value and if so, return it. Otherwise return NULL.
1586 NODE and INFO describes the caller node or the one it is inlined to, and
1587 its related info. */
1588
1589 static tree
ipa_agg_value_from_node(class ipa_node_params * info,struct cgraph_node * node,struct ipa_agg_jf_item * item)1590 ipa_agg_value_from_node (class ipa_node_params *info,
1591 struct cgraph_node *node,
1592 struct ipa_agg_jf_item *item)
1593 {
1594 tree value = NULL_TREE;
1595 int src_idx;
1596
1597 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1598 return NULL_TREE;
1599
1600 if (item->jftype == IPA_JF_CONST)
1601 return item->value.constant;
1602
1603 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1604 || item->jftype == IPA_JF_LOAD_AGG);
1605
1606 src_idx = item->value.pass_through.formal_id;
1607
1608 if (info->ipcp_orig_node)
1609 {
1610 if (item->jftype == IPA_JF_PASS_THROUGH)
1611 value = info->known_csts[src_idx];
1612 else
1613 value = get_clone_agg_value (node, item->value.load_agg.offset,
1614 src_idx);
1615 }
1616 else if (info->lattices)
1617 {
1618 class ipcp_param_lattices *src_plats
1619 = ipa_get_parm_lattices (info, src_idx);
1620
1621 if (item->jftype == IPA_JF_PASS_THROUGH)
1622 {
1623 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1624
1625 if (!lat->is_single_const ())
1626 return NULL_TREE;
1627
1628 value = lat->values->value;
1629 }
1630 else if (src_plats->aggs
1631 && !src_plats->aggs_bottom
1632 && !src_plats->aggs_contain_variable
1633 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1634 {
1635 struct ipcp_agg_lattice *aglat;
1636
1637 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1638 {
1639 if (aglat->offset > item->value.load_agg.offset)
1640 break;
1641
1642 if (aglat->offset == item->value.load_agg.offset)
1643 {
1644 if (aglat->is_single_const ())
1645 value = aglat->values->value;
1646 break;
1647 }
1648 }
1649 }
1650 }
1651
1652 if (!value)
1653 return NULL_TREE;
1654
1655 if (item->jftype == IPA_JF_LOAD_AGG)
1656 {
1657 tree load_type = item->value.load_agg.type;
1658 tree value_type = TREE_TYPE (value);
1659
1660 /* Ensure value type is compatible with load type. */
1661 if (!useless_type_conversion_p (load_type, value_type))
1662 return NULL_TREE;
1663 }
1664
1665 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1666 value,
1667 item->value.pass_through.operand,
1668 item->type);
1669 }
1670
1671 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1672 an aggregate and if so, return it. Otherwise return an empty set. NODE
1673 and INFO describes the caller node or the one it is inlined to, and its
1674 related info. */
1675
1676 struct ipa_agg_value_set
1677 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1678 struct ipa_agg_jump_function *agg_jfunc)
1679 {
1680 struct ipa_agg_value_set agg;
1681 struct ipa_agg_jf_item *item;
1682 int i;
1683
1684 agg.items = vNULL;
1685 agg.by_ref = agg_jfunc->by_ref;
1686
1687 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1688 {
1689 tree value = ipa_agg_value_from_node (info, node, item);
1690
1691 if (value)
1692 {
1693 struct ipa_agg_value value_item;
1694
1695 value_item.offset = item->offset;
1696 value_item.value = value;
1697
1698 agg.items.safe_push (value_item);
1699 }
1700 }
1701 return agg;
1702 }
1703
1704 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1705 bottom, not containing a variable component and without any known value at
1706 the same time. */
1707
1708 DEBUG_FUNCTION void
1709 ipcp_verify_propagated_values (void)
1710 {
1711 struct cgraph_node *node;
1712
1713 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1714 {
1715 class ipa_node_params *info = IPA_NODE_REF (node);
1716 if (!opt_for_fn (node->decl, flag_ipa_cp)
1717 || !opt_for_fn (node->decl, optimize))
1718 continue;
1719 int i, count = ipa_get_param_count (info);
1720
1721 for (i = 0; i < count; i++)
1722 {
1723 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1724
1725 if (!lat->bottom
1726 && !lat->contains_variable
1727 && lat->values_count == 0)
1728 {
1729 if (dump_file)
1730 {
1731 symtab->dump (dump_file);
1732 fprintf (dump_file, "\nIPA lattices after constant "
1733 "propagation, before gcc_unreachable:\n");
1734 print_all_lattices (dump_file, true, false);
1735 }
1736
1737 gcc_unreachable ();
1738 }
1739 }
1740 }
1741 }
1742
1743 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1744
1745 static bool
1746 values_equal_for_ipcp_p (tree x, tree y)
1747 {
1748 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1749
1750 if (x == y)
1751 return true;
1752
1753 if (TREE_CODE (x) == ADDR_EXPR
1754 && TREE_CODE (y) == ADDR_EXPR
1755 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1756 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1757 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1758 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1759 else
1760 return operand_equal_p (x, y, 0);
1761 }
1762
1763 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1764
1765 static bool
1766 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1767 ipa_polymorphic_call_context y)
1768 {
1769 return x.equal_to (y);
1770 }
1771
1772
1773 /* Add a new value source to the value represented by THIS, marking that a
1774 value comes from edge CS and (if the underlying jump function is a
1775 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1776 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1777 scalar value of the parameter itself or the offset within an aggregate. */
1778
1779 template <typename valtype>
1780 void
1781 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1782 int src_idx, HOST_WIDE_INT offset)
1783 {
1784 ipcp_value_source<valtype> *src;
1785
1786 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1787 src->offset = offset;
1788 src->cs = cs;
1789 src->val = src_val;
1790 src->index = src_idx;
1791
1792 src->next = sources;
1793 sources = src;
1794 }
1795
1796 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1797 SOURCE and clear all other fields. */
1798
1799 static ipcp_value<tree> *
1800 allocate_and_init_ipcp_value (tree source)
1801 {
1802 ipcp_value<tree> *val;
1803
1804 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1805 val->value = source;
1806 return val;
1807 }
1808
1809 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1810 value to SOURCE and clear all other fields. */
1811
1812 static ipcp_value<ipa_polymorphic_call_context> *
1813 allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
1814 {
1815 ipcp_value<ipa_polymorphic_call_context> *val;
1816
1817 // TODO
1818 val = new (ipcp_poly_ctx_values_pool.allocate ())
1819 ipcp_value<ipa_polymorphic_call_context>();
1820 val->value = source;
1821 return val;
1822 }
1823
1824 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1825 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1826 meaning. OFFSET -1 means the source is scalar and not a part of an
1827 aggregate. If non-NULL, VAL_P records address of existing or newly added
1828 ipcp_value. UNLIMITED means whether value count should not exceed the limit
1829 given by PARAM_IPA_CP_VALUE_LIST_SIZE. */
1830
1831 template <typename valtype>
1832 bool
1833 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1834 ipcp_value<valtype> *src_val,
1835 int src_idx, HOST_WIDE_INT offset,
1836 ipcp_value<valtype> **val_p,
1837 bool unlimited)
1838 {
1839 ipcp_value<valtype> *val, *last_val = NULL;
1840
1841 if (val_p)
1842 *val_p = NULL;
1843
1844 if (bottom)
1845 return false;
1846
1847 for (val = values; val; last_val = val, val = val->next)
1848 if (values_equal_for_ipcp_p (val->value, newval))
1849 {
1850 if (val_p)
1851 *val_p = val;
1852
1853 if (ipa_edge_within_scc (cs))
1854 {
1855 ipcp_value_source<valtype> *s;
1856 for (s = val->sources; s; s = s->next)
1857 if (s->cs == cs && s->val == src_val)
1858 break;
1859 if (s)
1860 return false;
1861 }
1862
1863 val->add_source (cs, src_val, src_idx, offset);
1864 return false;
1865 }
1866
1867 if (!unlimited && values_count == opt_for_fn (cs->caller->decl,
1868 param_ipa_cp_value_list_size))
1869 {
1870 /* We can only free sources, not the values themselves, because sources
1871 of other values in this SCC might point to them. */
1872 for (val = values; val; val = val->next)
1873 {
1874 while (val->sources)
1875 {
1876 ipcp_value_source<valtype> *src = val->sources;
1877 val->sources = src->next;
1878 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
1879 }
1880 }
1881 values = NULL;
1882 return set_to_bottom ();
1883 }
1884
1885 values_count++;
1886 val = allocate_and_init_ipcp_value (newval);
1887 val->add_source (cs, src_val, src_idx, offset);
1888 val->next = NULL;
1889
1890 /* Add the new value to end of value list, which can reduce iterations
1891 of propagation stage for recursive function. */
1892 if (last_val)
1893 last_val->next = val;
1894 else
1895 values = val;
1896
1897 if (val_p)
1898 *val_p = val;
1899
1900 return true;
1901 }
1902
1903 /* Return true, if a ipcp_value VAL is orginated from parameter value of
1904 self-feeding recursive function via some kind of pass-through jump
1905 function. */
1906
1907 static bool
1908 self_recursively_generated_p (ipcp_value<tree> *val)
1909 {
1910 class ipa_node_params *info = NULL;
1911
1912 for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
1913 {
1914 cgraph_edge *cs = src->cs;
1915
1916 if (!src->val || cs->caller != cs->callee->function_symbol ())
1917 return false;
1918
1919 if (src->val == val)
1920 continue;
1921
1922 if (!info)
1923 info = IPA_NODE_REF (cs->caller);
1924
1925 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
1926 src->index);
1927 ipcp_lattice<tree> *src_lat;
1928 ipcp_value<tree> *src_val;
1929
1930 if (src->offset == -1)
1931 src_lat = &plats->itself;
1932 else
1933 {
1934 struct ipcp_agg_lattice *src_aglat;
1935
1936 for (src_aglat = plats->aggs; src_aglat; src_aglat = src_aglat->next)
1937 if (src_aglat->offset == src->offset)
1938 break;
1939
1940 if (!src_aglat)
1941 return false;
1942
1943 src_lat = src_aglat;
1944 }
1945
1946 for (src_val = src_lat->values; src_val; src_val = src_val->next)
1947 if (src_val == val)
1948 break;
1949
1950 if (!src_val)
1951 return false;
1952 }
1953
1954 return true;
1955 }
1956
1957 /* A helper function that returns result of operation specified by OPCODE on
1958 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
1959 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
1960 acting as its second operand. If non-NULL, RES_TYPE is expected type of
1961 the result. */
1962
1963 static tree
1964 get_val_across_arith_op (enum tree_code opcode,
1965 tree opnd1_type,
1966 tree opnd2,
1967 ipcp_value<tree> *src_val,
1968 tree res_type)
1969 {
1970 tree opnd1 = src_val->value;
1971
1972 /* Skip source values that is incompatible with specified type. */
1973 if (opnd1_type
1974 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
1975 return NULL_TREE;
1976
1977 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
1978 }
1979
1980 /* Propagate values through an arithmetic transformation described by a jump
1981 function associated with edge CS, taking values from SRC_LAT and putting
1982 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
1983 OPND2 is a constant value if transformation is a binary operation.
1984 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
1985 a part of the aggregate. SRC_IDX is the index of the source parameter.
1986 RES_TYPE is the value type of result being propagated into. Return true if
1987 DEST_LAT changed. */
1988
1989 static bool
1990 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
1991 enum tree_code opcode,
1992 tree opnd1_type,
1993 tree opnd2,
1994 ipcp_lattice<tree> *src_lat,
1995 ipcp_lattice<tree> *dest_lat,
1996 HOST_WIDE_INT src_offset,
1997 int src_idx,
1998 tree res_type)
1999 {
2000 ipcp_value<tree> *src_val;
2001 bool ret = false;
2002
2003 /* Due to circular dependencies, propagating within an SCC through arithmetic
2004 transformation would create infinite number of values. But for
2005 self-feeding recursive function, we could allow propagation in a limited
2006 count, and this can enable a simple kind of recursive function versioning.
2007 For other scenario, we would just make lattices bottom. */
2008 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2009 {
2010 int i;
2011
2012 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2013 param_ipa_cp_max_recursive_depth);
2014 if (src_lat != dest_lat || max_recursive_depth < 1)
2015 return dest_lat->set_contains_variable ();
2016
2017 /* No benefit if recursive execution is in low probability. */
2018 if (cs->sreal_frequency () * 100
2019 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2020 param_ipa_cp_min_recursive_probability))
2021 return dest_lat->set_contains_variable ();
2022
2023 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2024
2025 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2026 {
2027 /* Now we do not use self-recursively generated value as propagation
2028 source, this is absolutely conservative, but could avoid explosion
2029 of lattice's value space, especially when one recursive function
2030 calls another recursive. */
2031 if (self_recursively_generated_p (src_val))
2032 {
2033 ipcp_value_source<tree> *s;
2034
2035 /* If the lattice has already been propagated for the call site,
2036 no need to do that again. */
2037 for (s = src_val->sources; s; s = s->next)
2038 if (s->cs == cs)
2039 return dest_lat->set_contains_variable ();
2040 }
2041 else
2042 val_seeds.safe_push (src_val);
2043 }
2044
2045 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2046
2047 /* Recursively generate lattice values with a limited count. */
2048 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2049 {
2050 for (int j = 1; j < max_recursive_depth; j++)
2051 {
2052 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2053 src_val, res_type);
2054 if (!cstval)
2055 break;
2056
2057 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2058 src_offset, &src_val, true);
2059 gcc_checking_assert (src_val);
2060 }
2061 }
2062 ret |= dest_lat->set_contains_variable ();
2063 }
2064 else
2065 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2066 {
2067 /* Now we do not use self-recursively generated value as propagation
2068 source, otherwise it is easy to make value space of normal lattice
2069 overflow. */
2070 if (self_recursively_generated_p (src_val))
2071 {
2072 ret |= dest_lat->set_contains_variable ();
2073 continue;
2074 }
2075
2076 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2077 src_val, res_type);
2078 if (cstval)
2079 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2080 src_offset);
2081 else
2082 ret |= dest_lat->set_contains_variable ();
2083 }
2084
2085 return ret;
2086 }
2087
2088 /* Propagate values through a pass-through jump function JFUNC associated with
2089 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2090 is the index of the source parameter. PARM_TYPE is the type of the
2091 parameter to which the result is passed. */
2092
2093 static bool
2094 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2095 ipcp_lattice<tree> *src_lat,
2096 ipcp_lattice<tree> *dest_lat, int src_idx,
2097 tree parm_type)
2098 {
2099 return propagate_vals_across_arith_jfunc (cs,
2100 ipa_get_jf_pass_through_operation (jfunc),
2101 NULL_TREE,
2102 ipa_get_jf_pass_through_operand (jfunc),
2103 src_lat, dest_lat, -1, src_idx, parm_type);
2104 }
2105
2106 /* Propagate values through an ancestor jump function JFUNC associated with
2107 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2108 is the index of the source parameter. */
2109
2110 static bool
2111 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2112 struct ipa_jump_func *jfunc,
2113 ipcp_lattice<tree> *src_lat,
2114 ipcp_lattice<tree> *dest_lat, int src_idx)
2115 {
2116 ipcp_value<tree> *src_val;
2117 bool ret = false;
2118
2119 if (ipa_edge_within_scc (cs))
2120 return dest_lat->set_contains_variable ();
2121
2122 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2123 {
2124 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2125
2126 if (t)
2127 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2128 else
2129 ret |= dest_lat->set_contains_variable ();
2130 }
2131
2132 return ret;
2133 }
2134
2135 /* Propagate scalar values across jump function JFUNC that is associated with
2136 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2137 parameter to which the result is passed. */
2138
2139 static bool
2140 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2141 struct ipa_jump_func *jfunc,
2142 ipcp_lattice<tree> *dest_lat,
2143 tree param_type)
2144 {
2145 if (dest_lat->bottom)
2146 return false;
2147
2148 if (jfunc->type == IPA_JF_CONST)
2149 {
2150 tree val = ipa_get_jf_constant (jfunc);
2151 return dest_lat->add_value (val, cs, NULL, 0);
2152 }
2153 else if (jfunc->type == IPA_JF_PASS_THROUGH
2154 || jfunc->type == IPA_JF_ANCESTOR)
2155 {
2156 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2157 ipcp_lattice<tree> *src_lat;
2158 int src_idx;
2159 bool ret;
2160
2161 if (jfunc->type == IPA_JF_PASS_THROUGH)
2162 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2163 else
2164 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2165
2166 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2167 if (src_lat->bottom)
2168 return dest_lat->set_contains_variable ();
2169
2170 /* If we would need to clone the caller and cannot, do not propagate. */
2171 if (!ipcp_versionable_function_p (cs->caller)
2172 && (src_lat->contains_variable
2173 || (src_lat->values_count > 1)))
2174 return dest_lat->set_contains_variable ();
2175
2176 if (jfunc->type == IPA_JF_PASS_THROUGH)
2177 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2178 dest_lat, src_idx, param_type);
2179 else
2180 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2181 src_idx);
2182
2183 if (src_lat->contains_variable)
2184 ret |= dest_lat->set_contains_variable ();
2185
2186 return ret;
2187 }
2188
2189 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2190 use it for indirect inlining), we should propagate them too. */
2191 return dest_lat->set_contains_variable ();
2192 }
2193
2194 /* Propagate scalar values across jump function JFUNC that is associated with
2195 edge CS and describes argument IDX and put the values into DEST_LAT. */
2196
2197 static bool
2198 propagate_context_across_jump_function (cgraph_edge *cs,
2199 ipa_jump_func *jfunc, int idx,
2200 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2201 {
2202 ipa_edge_args *args = IPA_EDGE_REF (cs);
2203 if (dest_lat->bottom)
2204 return false;
2205 bool ret = false;
2206 bool added_sth = false;
2207 bool type_preserved = true;
2208
2209 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2210 = ipa_get_ith_polymorhic_call_context (args, idx);
2211
2212 if (edge_ctx_ptr)
2213 edge_ctx = *edge_ctx_ptr;
2214
2215 if (jfunc->type == IPA_JF_PASS_THROUGH
2216 || jfunc->type == IPA_JF_ANCESTOR)
2217 {
2218 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2219 int src_idx;
2220 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2221
2222 /* TODO: Once we figure out how to propagate speculations, it will
2223 probably be a good idea to switch to speculation if type_preserved is
2224 not set instead of punting. */
2225 if (jfunc->type == IPA_JF_PASS_THROUGH)
2226 {
2227 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2228 goto prop_fail;
2229 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2230 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2231 }
2232 else
2233 {
2234 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2235 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2236 }
2237
2238 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2239 /* If we would need to clone the caller and cannot, do not propagate. */
2240 if (!ipcp_versionable_function_p (cs->caller)
2241 && (src_lat->contains_variable
2242 || (src_lat->values_count > 1)))
2243 goto prop_fail;
2244
2245 ipcp_value<ipa_polymorphic_call_context> *src_val;
2246 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2247 {
2248 ipa_polymorphic_call_context cur = src_val->value;
2249
2250 if (!type_preserved)
2251 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2252 if (jfunc->type == IPA_JF_ANCESTOR)
2253 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2254 /* TODO: In cases we know how the context is going to be used,
2255 we can improve the result by passing proper OTR_TYPE. */
2256 cur.combine_with (edge_ctx);
2257 if (!cur.useless_p ())
2258 {
2259 if (src_lat->contains_variable
2260 && !edge_ctx.equal_to (cur))
2261 ret |= dest_lat->set_contains_variable ();
2262 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2263 added_sth = true;
2264 }
2265 }
2266 }
2267
2268 prop_fail:
2269 if (!added_sth)
2270 {
2271 if (!edge_ctx.useless_p ())
2272 ret |= dest_lat->add_value (edge_ctx, cs);
2273 else
2274 ret |= dest_lat->set_contains_variable ();
2275 }
2276
2277 return ret;
2278 }
2279
2280 /* Propagate bits across jfunc that is associated with
2281 edge cs and update dest_lattice accordingly. */
2282
2283 bool
2284 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2285 ipa_jump_func *jfunc,
2286 ipcp_bits_lattice *dest_lattice)
2287 {
2288 if (dest_lattice->bottom_p ())
2289 return false;
2290
2291 enum availability availability;
2292 cgraph_node *callee = cs->callee->function_symbol (&availability);
2293 class ipa_node_params *callee_info = IPA_NODE_REF (callee);
2294 tree parm_type = ipa_get_type (callee_info, idx);
2295
2296 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2297 transform for these cases. Similarly, we can have bad type mismatches
2298 with LTO, avoid doing anything with those too. */
2299 if (!parm_type
2300 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2301 {
2302 if (dump_file && (dump_flags & TDF_DETAILS))
2303 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2304 "param %i of %s is NULL or unsuitable for bits propagation\n",
2305 idx, cs->callee->dump_name ());
2306
2307 return dest_lattice->set_to_bottom ();
2308 }
2309
2310 unsigned precision = TYPE_PRECISION (parm_type);
2311 signop sgn = TYPE_SIGN (parm_type);
2312
2313 if (jfunc->type == IPA_JF_PASS_THROUGH
2314 || jfunc->type == IPA_JF_ANCESTOR)
2315 {
2316 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2317 tree operand = NULL_TREE;
2318 enum tree_code code;
2319 unsigned src_idx;
2320
2321 if (jfunc->type == IPA_JF_PASS_THROUGH)
2322 {
2323 code = ipa_get_jf_pass_through_operation (jfunc);
2324 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2325 if (code != NOP_EXPR)
2326 operand = ipa_get_jf_pass_through_operand (jfunc);
2327 }
2328 else
2329 {
2330 code = POINTER_PLUS_EXPR;
2331 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2332 unsigned HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2333 operand = build_int_cstu (size_type_node, offset);
2334 }
2335
2336 class ipcp_param_lattices *src_lats
2337 = ipa_get_parm_lattices (caller_info, src_idx);
2338
2339 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2340 for eg consider:
2341 int f(int x)
2342 {
2343 g (x & 0xff);
2344 }
2345 Assume lattice for x is bottom, however we can still propagate
2346 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2347 and we store it in jump function during analysis stage. */
2348
2349 if (src_lats->bits_lattice.bottom_p ()
2350 && jfunc->bits)
2351 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2352 precision);
2353 else
2354 return dest_lattice->meet_with (src_lats->bits_lattice, precision, sgn,
2355 code, operand);
2356 }
2357
2358 else if (jfunc->type == IPA_JF_ANCESTOR)
2359 return dest_lattice->set_to_bottom ();
2360 else if (jfunc->bits)
2361 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2362 precision);
2363 else
2364 return dest_lattice->set_to_bottom ();
2365 }
2366
2367 /* Propagate value range across jump function JFUNC that is associated with
2368 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2369 accordingly. */
2370
2371 static bool
2372 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2373 class ipcp_param_lattices *dest_plats,
2374 tree param_type)
2375 {
2376 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2377
2378 if (dest_lat->bottom_p ())
2379 return false;
2380
2381 if (!param_type
2382 || (!INTEGRAL_TYPE_P (param_type)
2383 && !POINTER_TYPE_P (param_type)))
2384 return dest_lat->set_to_bottom ();
2385
2386 if (jfunc->type == IPA_JF_PASS_THROUGH)
2387 {
2388 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2389 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2390 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2391 class ipcp_param_lattices *src_lats
2392 = ipa_get_parm_lattices (caller_info, src_idx);
2393 tree operand_type = ipa_get_type (caller_info, src_idx);
2394
2395 if (src_lats->m_value_range.bottom_p ())
2396 return dest_lat->set_to_bottom ();
2397
2398 value_range vr;
2399 if (TREE_CODE_CLASS (operation) == tcc_unary)
2400 ipa_vr_operation_and_type_effects (&vr,
2401 &src_lats->m_value_range.m_vr,
2402 operation, param_type,
2403 operand_type);
2404 /* A crude way to prevent unbounded number of value range updates
2405 in SCC components. We should allow limited number of updates within
2406 SCC, too. */
2407 else if (!ipa_edge_within_scc (cs))
2408 {
2409 tree op = ipa_get_jf_pass_through_operand (jfunc);
2410 value_range op_vr (op, op);
2411 value_range op_res,res;
2412
2413 range_fold_binary_expr (&op_res, operation, operand_type,
2414 &src_lats->m_value_range.m_vr, &op_vr);
2415 ipa_vr_operation_and_type_effects (&vr,
2416 &op_res,
2417 NOP_EXPR, param_type,
2418 operand_type);
2419 }
2420 if (!vr.undefined_p () && !vr.varying_p ())
2421 {
2422 if (jfunc->m_vr)
2423 {
2424 value_range jvr;
2425 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2426 NOP_EXPR,
2427 param_type,
2428 jfunc->m_vr->type ()))
2429 vr.intersect (jvr);
2430 }
2431 return dest_lat->meet_with (&vr);
2432 }
2433 }
2434 else if (jfunc->type == IPA_JF_CONST)
2435 {
2436 tree val = ipa_get_jf_constant (jfunc);
2437 if (TREE_CODE (val) == INTEGER_CST)
2438 {
2439 val = fold_convert (param_type, val);
2440 if (TREE_OVERFLOW_P (val))
2441 val = drop_tree_overflow (val);
2442
2443 value_range tmpvr (val, val);
2444 return dest_lat->meet_with (&tmpvr);
2445 }
2446 }
2447
2448 value_range vr;
2449 if (jfunc->m_vr
2450 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2451 param_type,
2452 jfunc->m_vr->type ()))
2453 return dest_lat->meet_with (&vr);
2454 else
2455 return dest_lat->set_to_bottom ();
2456 }
2457
2458 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2459 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2460 other cases, return false). If there are no aggregate items, set
2461 aggs_by_ref to NEW_AGGS_BY_REF. */
2462
2463 static bool
2464 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2465 bool new_aggs_by_ref)
2466 {
2467 if (dest_plats->aggs)
2468 {
2469 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2470 {
2471 set_agg_lats_to_bottom (dest_plats);
2472 return true;
2473 }
2474 }
2475 else
2476 dest_plats->aggs_by_ref = new_aggs_by_ref;
2477 return false;
2478 }
2479
2480 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2481 already existing lattice for the given OFFSET and SIZE, marking all skipped
2482 lattices as containing variable and checking for overlaps. If there is no
2483 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2484 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2485 unless there are too many already. If there are two many, return false. If
2486 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2487 skipped lattices were newly marked as containing variable, set *CHANGE to
2488 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2489
2490 static bool
2491 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2492 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2493 struct ipcp_agg_lattice ***aglat,
2494 bool pre_existing, bool *change, int max_agg_items)
2495 {
2496 gcc_checking_assert (offset >= 0);
2497
2498 while (**aglat && (**aglat)->offset < offset)
2499 {
2500 if ((**aglat)->offset + (**aglat)->size > offset)
2501 {
2502 set_agg_lats_to_bottom (dest_plats);
2503 return false;
2504 }
2505 *change |= (**aglat)->set_contains_variable ();
2506 *aglat = &(**aglat)->next;
2507 }
2508
2509 if (**aglat && (**aglat)->offset == offset)
2510 {
2511 if ((**aglat)->size != val_size)
2512 {
2513 set_agg_lats_to_bottom (dest_plats);
2514 return false;
2515 }
2516 gcc_assert (!(**aglat)->next
2517 || (**aglat)->next->offset >= offset + val_size);
2518 return true;
2519 }
2520 else
2521 {
2522 struct ipcp_agg_lattice *new_al;
2523
2524 if (**aglat && (**aglat)->offset < offset + val_size)
2525 {
2526 set_agg_lats_to_bottom (dest_plats);
2527 return false;
2528 }
2529 if (dest_plats->aggs_count == max_agg_items)
2530 return false;
2531 dest_plats->aggs_count++;
2532 new_al = ipcp_agg_lattice_pool.allocate ();
2533 memset (new_al, 0, sizeof (*new_al));
2534
2535 new_al->offset = offset;
2536 new_al->size = val_size;
2537 new_al->contains_variable = pre_existing;
2538
2539 new_al->next = **aglat;
2540 **aglat = new_al;
2541 return true;
2542 }
2543 }
2544
2545 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2546 containing an unknown value. */
2547
2548 static bool
2549 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2550 {
2551 bool ret = false;
2552 while (aglat)
2553 {
2554 ret |= aglat->set_contains_variable ();
2555 aglat = aglat->next;
2556 }
2557 return ret;
2558 }
2559
2560 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2561 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2562 parameter used for lattice value sources. Return true if DEST_PLATS changed
2563 in any way. */
2564
2565 static bool
2566 merge_aggregate_lattices (struct cgraph_edge *cs,
2567 class ipcp_param_lattices *dest_plats,
2568 class ipcp_param_lattices *src_plats,
2569 int src_idx, HOST_WIDE_INT offset_delta)
2570 {
2571 bool pre_existing = dest_plats->aggs != NULL;
2572 struct ipcp_agg_lattice **dst_aglat;
2573 bool ret = false;
2574
2575 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2576 return true;
2577 if (src_plats->aggs_bottom)
2578 return set_agg_lats_contain_variable (dest_plats);
2579 if (src_plats->aggs_contain_variable)
2580 ret |= set_agg_lats_contain_variable (dest_plats);
2581 dst_aglat = &dest_plats->aggs;
2582
2583 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2584 param_ipa_max_agg_items);
2585 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2586 src_aglat;
2587 src_aglat = src_aglat->next)
2588 {
2589 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2590
2591 if (new_offset < 0)
2592 continue;
2593 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2594 &dst_aglat, pre_existing, &ret, max_agg_items))
2595 {
2596 struct ipcp_agg_lattice *new_al = *dst_aglat;
2597
2598 dst_aglat = &(*dst_aglat)->next;
2599 if (src_aglat->bottom)
2600 {
2601 ret |= new_al->set_contains_variable ();
2602 continue;
2603 }
2604 if (src_aglat->contains_variable)
2605 ret |= new_al->set_contains_variable ();
2606 for (ipcp_value<tree> *val = src_aglat->values;
2607 val;
2608 val = val->next)
2609 ret |= new_al->add_value (val->value, cs, val, src_idx,
2610 src_aglat->offset);
2611 }
2612 else if (dest_plats->aggs_bottom)
2613 return true;
2614 }
2615 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2616 return ret;
2617 }
2618
2619 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2620 pass-through JFUNC and if so, whether it has conform and conforms to the
2621 rules about propagating values passed by reference. */
2622
2623 static bool
2624 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2625 struct ipa_jump_func *jfunc)
2626 {
2627 return src_plats->aggs
2628 && (!src_plats->aggs_by_ref
2629 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2630 }
2631
2632 /* Propagate values through ITEM, jump function for a part of an aggregate,
2633 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2634 associated with the jump function. Return true if AGLAT changed in any
2635 way. */
2636
2637 static bool
2638 propagate_aggregate_lattice (struct cgraph_edge *cs,
2639 struct ipa_agg_jf_item *item,
2640 struct ipcp_agg_lattice *aglat)
2641 {
2642 class ipa_node_params *caller_info;
2643 class ipcp_param_lattices *src_plats;
2644 struct ipcp_lattice<tree> *src_lat;
2645 HOST_WIDE_INT src_offset;
2646 int src_idx;
2647 tree load_type;
2648 bool ret;
2649
2650 if (item->jftype == IPA_JF_CONST)
2651 {
2652 tree value = item->value.constant;
2653
2654 gcc_checking_assert (is_gimple_ip_invariant (value));
2655 return aglat->add_value (value, cs, NULL, 0);
2656 }
2657
2658 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2659 || item->jftype == IPA_JF_LOAD_AGG);
2660
2661 caller_info = IPA_NODE_REF (cs->caller);
2662 src_idx = item->value.pass_through.formal_id;
2663 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2664
2665 if (item->jftype == IPA_JF_PASS_THROUGH)
2666 {
2667 load_type = NULL_TREE;
2668 src_lat = &src_plats->itself;
2669 src_offset = -1;
2670 }
2671 else
2672 {
2673 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2674 struct ipcp_agg_lattice *src_aglat;
2675
2676 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2677 if (src_aglat->offset >= load_offset)
2678 break;
2679
2680 load_type = item->value.load_agg.type;
2681 if (!src_aglat
2682 || src_aglat->offset > load_offset
2683 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2684 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2685 return aglat->set_contains_variable ();
2686
2687 src_lat = src_aglat;
2688 src_offset = load_offset;
2689 }
2690
2691 if (src_lat->bottom
2692 || (!ipcp_versionable_function_p (cs->caller)
2693 && !src_lat->is_single_const ()))
2694 return aglat->set_contains_variable ();
2695
2696 ret = propagate_vals_across_arith_jfunc (cs,
2697 item->value.pass_through.operation,
2698 load_type,
2699 item->value.pass_through.operand,
2700 src_lat, aglat,
2701 src_offset,
2702 src_idx,
2703 item->type);
2704
2705 if (src_lat->contains_variable)
2706 ret |= aglat->set_contains_variable ();
2707
2708 return ret;
2709 }
2710
2711 /* Propagate scalar values across jump function JFUNC that is associated with
2712 edge CS and put the values into DEST_LAT. */
2713
2714 static bool
2715 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2716 struct ipa_jump_func *jfunc,
2717 class ipcp_param_lattices *dest_plats)
2718 {
2719 bool ret = false;
2720
2721 if (dest_plats->aggs_bottom)
2722 return false;
2723
2724 if (jfunc->type == IPA_JF_PASS_THROUGH
2725 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2726 {
2727 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2728 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2729 class ipcp_param_lattices *src_plats;
2730
2731 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2732 if (agg_pass_through_permissible_p (src_plats, jfunc))
2733 {
2734 /* Currently we do not produce clobber aggregate jump
2735 functions, replace with merging when we do. */
2736 gcc_assert (!jfunc->agg.items);
2737 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2738 src_idx, 0);
2739 }
2740 else
2741 ret |= set_agg_lats_contain_variable (dest_plats);
2742 }
2743 else if (jfunc->type == IPA_JF_ANCESTOR
2744 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2745 {
2746 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
2747 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2748 class ipcp_param_lattices *src_plats;
2749
2750 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2751 if (src_plats->aggs && src_plats->aggs_by_ref)
2752 {
2753 /* Currently we do not produce clobber aggregate jump
2754 functions, replace with merging when we do. */
2755 gcc_assert (!jfunc->agg.items);
2756 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2757 ipa_get_jf_ancestor_offset (jfunc));
2758 }
2759 else if (!src_plats->aggs_by_ref)
2760 ret |= set_agg_lats_to_bottom (dest_plats);
2761 else
2762 ret |= set_agg_lats_contain_variable (dest_plats);
2763 }
2764 else if (jfunc->agg.items)
2765 {
2766 bool pre_existing = dest_plats->aggs != NULL;
2767 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2768 struct ipa_agg_jf_item *item;
2769 int i;
2770
2771 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2772 return true;
2773
2774 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2775 param_ipa_max_agg_items);
2776 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2777 {
2778 HOST_WIDE_INT val_size;
2779
2780 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2781 continue;
2782 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2783
2784 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2785 &aglat, pre_existing, &ret, max_agg_items))
2786 {
2787 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2788 aglat = &(*aglat)->next;
2789 }
2790 else if (dest_plats->aggs_bottom)
2791 return true;
2792 }
2793
2794 ret |= set_chain_of_aglats_contains_variable (*aglat);
2795 }
2796 else
2797 ret |= set_agg_lats_contain_variable (dest_plats);
2798
2799 return ret;
2800 }
2801
2802 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2803 non-thunk) destination, the call passes through a thunk. */
2804
2805 static bool
2806 call_passes_through_thunk_p (cgraph_edge *cs)
2807 {
2808 cgraph_node *alias_or_thunk = cs->callee;
2809 while (alias_or_thunk->alias)
2810 alias_or_thunk = alias_or_thunk->get_alias_target ();
2811 return alias_or_thunk->thunk.thunk_p;
2812 }
2813
2814 /* Propagate constants from the caller to the callee of CS. INFO describes the
2815 caller. */
2816
2817 static bool
2818 propagate_constants_across_call (struct cgraph_edge *cs)
2819 {
2820 class ipa_node_params *callee_info;
2821 enum availability availability;
2822 cgraph_node *callee;
2823 class ipa_edge_args *args;
2824 bool ret = false;
2825 int i, args_count, parms_count;
2826
2827 callee = cs->callee->function_symbol (&availability);
2828 if (!callee->definition)
2829 return false;
2830 gcc_checking_assert (callee->has_gimple_body_p ());
2831 callee_info = IPA_NODE_REF (callee);
2832 if (!callee_info)
2833 return false;
2834
2835 args = IPA_EDGE_REF (cs);
2836 parms_count = ipa_get_param_count (callee_info);
2837 if (parms_count == 0)
2838 return false;
2839 if (!args
2840 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2841 || !opt_for_fn (cs->caller->decl, optimize))
2842 {
2843 for (i = 0; i < parms_count; i++)
2844 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2845 i));
2846 return ret;
2847 }
2848 args_count = ipa_get_cs_argument_count (args);
2849
2850 /* If this call goes through a thunk we must not propagate to the first (0th)
2851 parameter. However, we might need to uncover a thunk from below a series
2852 of aliases first. */
2853 if (call_passes_through_thunk_p (cs))
2854 {
2855 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2856 0));
2857 i = 1;
2858 }
2859 else
2860 i = 0;
2861
2862 for (; (i < args_count) && (i < parms_count); i++)
2863 {
2864 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2865 class ipcp_param_lattices *dest_plats;
2866 tree param_type = ipa_get_type (callee_info, i);
2867
2868 dest_plats = ipa_get_parm_lattices (callee_info, i);
2869 if (availability == AVAIL_INTERPOSABLE)
2870 ret |= set_all_contains_variable (dest_plats);
2871 else
2872 {
2873 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2874 &dest_plats->itself,
2875 param_type);
2876 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2877 &dest_plats->ctxlat);
2878 ret
2879 |= propagate_bits_across_jump_function (cs, i, jump_func,
2880 &dest_plats->bits_lattice);
2881 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2882 dest_plats);
2883 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2884 ret |= propagate_vr_across_jump_function (cs, jump_func,
2885 dest_plats, param_type);
2886 else
2887 ret |= dest_plats->m_value_range.set_to_bottom ();
2888 }
2889 }
2890 for (; i < parms_count; i++)
2891 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2892
2893 return ret;
2894 }
2895
2896 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2897 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2898 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2899
2900 static tree
2901 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2902 vec<tree> known_csts,
2903 vec<ipa_polymorphic_call_context> known_contexts,
2904 vec<ipa_agg_value_set> known_aggs,
2905 struct ipa_agg_replacement_value *agg_reps,
2906 bool *speculative)
2907 {
2908 int param_index = ie->indirect_info->param_index;
2909 HOST_WIDE_INT anc_offset;
2910 tree t = NULL;
2911 tree target = NULL;
2912
2913 *speculative = false;
2914
2915 if (param_index == -1)
2916 return NULL_TREE;
2917
2918 if (!ie->indirect_info->polymorphic)
2919 {
2920 tree t = NULL;
2921
2922 if (ie->indirect_info->agg_contents)
2923 {
2924 t = NULL;
2925 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
2926 {
2927 while (agg_reps)
2928 {
2929 if (agg_reps->index == param_index
2930 && agg_reps->offset == ie->indirect_info->offset
2931 && agg_reps->by_ref == ie->indirect_info->by_ref)
2932 {
2933 t = agg_reps->value;
2934 break;
2935 }
2936 agg_reps = agg_reps->next;
2937 }
2938 }
2939 if (!t)
2940 {
2941 struct ipa_agg_value_set *agg;
2942 if (known_aggs.length () > (unsigned int) param_index)
2943 agg = &known_aggs[param_index];
2944 else
2945 agg = NULL;
2946 bool from_global_constant;
2947 t = ipa_find_agg_cst_for_param (agg,
2948 (unsigned) param_index
2949 < known_csts.length ()
2950 ? known_csts[param_index]
2951 : NULL,
2952 ie->indirect_info->offset,
2953 ie->indirect_info->by_ref,
2954 &from_global_constant);
2955 if (t
2956 && !from_global_constant
2957 && !ie->indirect_info->guaranteed_unmodified)
2958 t = NULL_TREE;
2959 }
2960 }
2961 else if ((unsigned) param_index < known_csts.length ())
2962 t = known_csts[param_index];
2963
2964 if (t
2965 && TREE_CODE (t) == ADDR_EXPR
2966 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
2967 return TREE_OPERAND (t, 0);
2968 else
2969 return NULL_TREE;
2970 }
2971
2972 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
2973 return NULL_TREE;
2974
2975 gcc_assert (!ie->indirect_info->agg_contents);
2976 anc_offset = ie->indirect_info->offset;
2977
2978 t = NULL;
2979
2980 /* Try to work out value of virtual table pointer value in replacements. */
2981 if (!t && agg_reps && !ie->indirect_info->by_ref)
2982 {
2983 while (agg_reps)
2984 {
2985 if (agg_reps->index == param_index
2986 && agg_reps->offset == ie->indirect_info->offset
2987 && agg_reps->by_ref)
2988 {
2989 t = agg_reps->value;
2990 break;
2991 }
2992 agg_reps = agg_reps->next;
2993 }
2994 }
2995
2996 /* Try to work out value of virtual table pointer value in known
2997 aggregate values. */
2998 if (!t && known_aggs.length () > (unsigned int) param_index
2999 && !ie->indirect_info->by_ref)
3000 {
3001 struct ipa_agg_value_set *agg = &known_aggs[param_index];
3002 t = ipa_find_agg_cst_for_param (agg,
3003 (unsigned) param_index
3004 < known_csts.length ()
3005 ? known_csts[param_index] : NULL,
3006 ie->indirect_info->offset, true);
3007 }
3008
3009 /* If we found the virtual table pointer, lookup the target. */
3010 if (t)
3011 {
3012 tree vtable;
3013 unsigned HOST_WIDE_INT offset;
3014 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3015 {
3016 bool can_refer;
3017 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3018 vtable, offset, &can_refer);
3019 if (can_refer)
3020 {
3021 if (!target
3022 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3023 || !possible_polymorphic_call_target_p
3024 (ie, cgraph_node::get (target)))
3025 {
3026 /* Do not speculate builtin_unreachable, it is stupid! */
3027 if (ie->indirect_info->vptr_changed)
3028 return NULL;
3029 target = ipa_impossible_devirt_target (ie, target);
3030 }
3031 *speculative = ie->indirect_info->vptr_changed;
3032 if (!*speculative)
3033 return target;
3034 }
3035 }
3036 }
3037
3038 /* Do we know the constant value of pointer? */
3039 if (!t && (unsigned) param_index < known_csts.length ())
3040 t = known_csts[param_index];
3041
3042 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3043
3044 ipa_polymorphic_call_context context;
3045 if (known_contexts.length () > (unsigned int) param_index)
3046 {
3047 context = known_contexts[param_index];
3048 context.offset_by (anc_offset);
3049 if (ie->indirect_info->vptr_changed)
3050 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3051 ie->indirect_info->otr_type);
3052 if (t)
3053 {
3054 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3055 (t, ie->indirect_info->otr_type, anc_offset);
3056 if (!ctx2.useless_p ())
3057 context.combine_with (ctx2, ie->indirect_info->otr_type);
3058 }
3059 }
3060 else if (t)
3061 {
3062 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3063 anc_offset);
3064 if (ie->indirect_info->vptr_changed)
3065 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3066 ie->indirect_info->otr_type);
3067 }
3068 else
3069 return NULL_TREE;
3070
3071 vec <cgraph_node *>targets;
3072 bool final;
3073
3074 targets = possible_polymorphic_call_targets
3075 (ie->indirect_info->otr_type,
3076 ie->indirect_info->otr_token,
3077 context, &final);
3078 if (!final || targets.length () > 1)
3079 {
3080 struct cgraph_node *node;
3081 if (*speculative)
3082 return target;
3083 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3084 || ie->speculative || !ie->maybe_hot_p ())
3085 return NULL;
3086 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3087 ie->indirect_info->otr_token,
3088 context);
3089 if (node)
3090 {
3091 *speculative = true;
3092 target = node->decl;
3093 }
3094 else
3095 return NULL;
3096 }
3097 else
3098 {
3099 *speculative = false;
3100 if (targets.length () == 1)
3101 target = targets[0]->decl;
3102 else
3103 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3104 }
3105
3106 if (target && !possible_polymorphic_call_target_p (ie,
3107 cgraph_node::get (target)))
3108 {
3109 if (*speculative)
3110 return NULL;
3111 target = ipa_impossible_devirt_target (ie, target);
3112 }
3113
3114 return target;
3115 }
3116
3117
3118 /* If an indirect edge IE can be turned into a direct one based on KNOWN_CSTS,
3119 KNOWN_CONTEXTS (which can be vNULL) or KNOWN_AGGS (which also can be vNULL)
3120 return the destination. */
3121
3122 tree
3123 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3124 vec<tree> known_csts,
3125 vec<ipa_polymorphic_call_context> known_contexts,
3126 vec<ipa_agg_value_set> known_aggs,
3127 bool *speculative)
3128 {
3129 return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
3130 known_aggs, NULL, speculative);
3131 }
3132
3133 /* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS
3134 and KNOWN_CONTEXTS. */
3135
3136 static int
3137 devirtualization_time_bonus (struct cgraph_node *node,
3138 vec<tree> known_csts,
3139 vec<ipa_polymorphic_call_context> known_contexts,
3140 vec<ipa_agg_value_set> known_aggs)
3141 {
3142 struct cgraph_edge *ie;
3143 int res = 0;
3144
3145 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3146 {
3147 struct cgraph_node *callee;
3148 class ipa_fn_summary *isummary;
3149 enum availability avail;
3150 tree target;
3151 bool speculative;
3152
3153 target = ipa_get_indirect_edge_target (ie, known_csts, known_contexts,
3154 known_aggs, &speculative);
3155 if (!target)
3156 continue;
3157
3158 /* Only bare minimum benefit for clearly un-inlineable targets. */
3159 res += 1;
3160 callee = cgraph_node::get (target);
3161 if (!callee || !callee->definition)
3162 continue;
3163 callee = callee->function_symbol (&avail);
3164 if (avail < AVAIL_AVAILABLE)
3165 continue;
3166 isummary = ipa_fn_summaries->get (callee);
3167 if (!isummary || !isummary->inlinable)
3168 continue;
3169
3170 int size = ipa_size_summaries->get (callee)->size;
3171 /* FIXME: The values below need re-considering and perhaps also
3172 integrating into the cost metrics, at lest in some very basic way. */
3173 int max_inline_insns_auto
3174 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3175 if (size <= max_inline_insns_auto / 4)
3176 res += 31 / ((int)speculative + 1);
3177 else if (size <= max_inline_insns_auto / 2)
3178 res += 15 / ((int)speculative + 1);
3179 else if (size <= max_inline_insns_auto
3180 || DECL_DECLARED_INLINE_P (callee->decl))
3181 res += 7 / ((int)speculative + 1);
3182 }
3183
3184 return res;
3185 }
3186
3187 /* Return time bonus incurred because of HINTS. */
3188
3189 static int
3190 hint_time_bonus (cgraph_node *node, ipa_hints hints)
3191 {
3192 int result = 0;
3193 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3194 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3195 return result;
3196 }
3197
3198 /* If there is a reason to penalize the function described by INFO in the
3199 cloning goodness evaluation, do so. */
3200
3201 static inline int64_t
3202 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3203 int64_t evaluation)
3204 {
3205 if (info->node_within_scc && !info->node_is_self_scc)
3206 evaluation = (evaluation
3207 * (100 - opt_for_fn (node->decl,
3208 param_ipa_cp_recursion_penalty))) / 100;
3209
3210 if (info->node_calling_single_call)
3211 evaluation = (evaluation
3212 * (100 - opt_for_fn (node->decl,
3213 param_ipa_cp_single_call_penalty)))
3214 / 100;
3215
3216 return evaluation;
3217 }
3218
3219 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3220 and SIZE_COST and with the sum of frequencies of incoming edges to the
3221 potential new clone in FREQUENCIES. */
3222
3223 static bool
3224 good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
3225 int freq_sum, profile_count count_sum, int size_cost)
3226 {
3227 if (time_benefit == 0
3228 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3229 || node->optimize_for_size_p ())
3230 return false;
3231
3232 gcc_assert (size_cost > 0);
3233
3234 class ipa_node_params *info = IPA_NODE_REF (node);
3235 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3236 if (max_count > profile_count::zero ())
3237 {
3238 int factor = RDIV (count_sum.probability_in
3239 (max_count).to_reg_br_prob_base ()
3240 * 1000, REG_BR_PROB_BASE);
3241 int64_t evaluation = (((int64_t) time_benefit * factor)
3242 / size_cost);
3243 evaluation = incorporate_penalties (node, info, evaluation);
3244
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 {
3247 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, "
3248 "size: %i, count_sum: ", time_benefit, size_cost);
3249 count_sum.dump (dump_file);
3250 fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
3251 ", threshold: %i\n",
3252 info->node_within_scc
3253 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3254 info->node_calling_single_call ? ", single_call" : "",
3255 evaluation, eval_threshold);
3256 }
3257
3258 return evaluation >= eval_threshold;
3259 }
3260 else
3261 {
3262 int64_t evaluation = (((int64_t) time_benefit * freq_sum)
3263 / size_cost);
3264 evaluation = incorporate_penalties (node, info, evaluation);
3265
3266 if (dump_file && (dump_flags & TDF_DETAILS))
3267 fprintf (dump_file, " good_cloning_opportunity_p (time: %i, "
3268 "size: %i, freq_sum: %i%s%s) -> evaluation: "
3269 "%" PRId64 ", threshold: %i\n",
3270 time_benefit, size_cost, freq_sum,
3271 info->node_within_scc
3272 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3273 info->node_calling_single_call ? ", single_call" : "",
3274 evaluation, eval_threshold);
3275
3276 return evaluation >= eval_threshold;
3277 }
3278 }
3279
3280 /* Return all context independent values from aggregate lattices in PLATS in a
3281 vector. Return NULL if there are none. */
3282
3283 static vec<ipa_agg_value>
3284 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3285 {
3286 vec<ipa_agg_value> res = vNULL;
3287
3288 if (plats->aggs_bottom
3289 || plats->aggs_contain_variable
3290 || plats->aggs_count == 0)
3291 return vNULL;
3292
3293 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3294 aglat;
3295 aglat = aglat->next)
3296 if (aglat->is_single_const ())
3297 {
3298 struct ipa_agg_value item;
3299 item.offset = aglat->offset;
3300 item.value = aglat->values->value;
3301 res.safe_push (item);
3302 }
3303 return res;
3304 }
3305
3306 /* Allocate KNOWN_CSTS, KNOWN_CONTEXTS and, if non-NULL, KNOWN_AGGS and
3307 populate them with values of parameters that are known independent of the
3308 context. INFO describes the function. If REMOVABLE_PARAMS_COST is
3309 non-NULL, the movement cost of all removable parameters will be stored in
3310 it. */
3311
3312 static bool
3313 gather_context_independent_values (class ipa_node_params *info,
3314 vec<tree> *known_csts,
3315 vec<ipa_polymorphic_call_context>
3316 *known_contexts,
3317 vec<ipa_agg_value_set> *known_aggs,
3318 int *removable_params_cost)
3319 {
3320 int i, count = ipa_get_param_count (info);
3321 bool ret = false;
3322
3323 known_csts->create (0);
3324 known_contexts->create (0);
3325 known_csts->safe_grow_cleared (count);
3326 known_contexts->safe_grow_cleared (count);
3327 if (known_aggs)
3328 {
3329 known_aggs->create (0);
3330 known_aggs->safe_grow_cleared (count);
3331 }
3332
3333 if (removable_params_cost)
3334 *removable_params_cost = 0;
3335
3336 for (i = 0; i < count; i++)
3337 {
3338 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3339 ipcp_lattice<tree> *lat = &plats->itself;
3340
3341 if (lat->is_single_const ())
3342 {
3343 ipcp_value<tree> *val = lat->values;
3344 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3345 (*known_csts)[i] = val->value;
3346 if (removable_params_cost)
3347 *removable_params_cost
3348 += estimate_move_cost (TREE_TYPE (val->value), false);
3349 ret = true;
3350 }
3351 else if (removable_params_cost
3352 && !ipa_is_param_used (info, i))
3353 *removable_params_cost
3354 += ipa_get_param_move_cost (info, i);
3355
3356 if (!ipa_is_param_used (info, i))
3357 continue;
3358
3359 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3360 /* Do not account known context as reason for cloning. We can see
3361 if it permits devirtualization. */
3362 if (ctxlat->is_single_const ())
3363 (*known_contexts)[i] = ctxlat->values->value;
3364
3365 if (known_aggs)
3366 {
3367 vec<ipa_agg_value> agg_items;
3368 struct ipa_agg_value_set *agg;
3369
3370 agg_items = context_independent_aggregate_values (plats);
3371 agg = &(*known_aggs)[i];
3372 agg->items = agg_items;
3373 agg->by_ref = plats->aggs_by_ref;
3374 ret |= !agg_items.is_empty ();
3375 }
3376 }
3377
3378 return ret;
3379 }
3380
3381 /* Perform time and size measurement of NODE with the context given in
3382 KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost
3383 given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of
3384 all context-independent removable parameters and EST_MOVE_COST of estimated
3385 movement of the considered parameter and store it into VAL. */
3386
3387 static void
3388 perform_estimation_of_a_value (cgraph_node *node, vec<tree> known_csts,
3389 vec<ipa_polymorphic_call_context> known_contexts,
3390 vec<ipa_agg_value_set> known_aggs,
3391 int removable_params_cost,
3392 int est_move_cost, ipcp_value_base *val)
3393 {
3394 int size, time_benefit;
3395 sreal time, base_time;
3396 ipa_hints hints;
3397
3398 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
3399 known_aggs, &size, &time,
3400 &base_time, &hints);
3401 base_time -= time;
3402 if (base_time > 65535)
3403 base_time = 65535;
3404
3405 /* Extern inline functions have no cloning local time benefits because they
3406 will be inlined anyway. The only reason to clone them is if it enables
3407 optimization in any of the functions they call. */
3408 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3409 time_benefit = 0;
3410 else
3411 time_benefit = base_time.to_int ()
3412 + devirtualization_time_bonus (node, known_csts, known_contexts,
3413 known_aggs)
3414 + hint_time_bonus (node, hints)
3415 + removable_params_cost + est_move_cost;
3416
3417 gcc_checking_assert (size >=0);
3418 /* The inliner-heuristics based estimates may think that in certain
3419 contexts some functions do not have any size at all but we want
3420 all specializations to have at least a tiny cost, not least not to
3421 divide by zero. */
3422 if (size == 0)
3423 size = 1;
3424
3425 val->local_time_benefit = time_benefit;
3426 val->local_size_cost = size;
3427 }
3428
3429 /* Get the overall limit oof growth based on parameters extracted from growth.
3430 it does not really make sense to mix functions with different overall growth
3431 limits but it is possible and if it happens, we do not want to select one
3432 limit at random. */
3433
3434 static long
3435 get_max_overall_size (cgraph_node *node)
3436 {
3437 long max_new_size = orig_overall_size;
3438 long large_unit = opt_for_fn (node->decl, param_large_unit_insns);
3439 if (max_new_size < large_unit)
3440 max_new_size = large_unit;
3441 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3442 max_new_size += max_new_size * unit_growth / 100 + 1;
3443 return max_new_size;
3444 }
3445
3446 /* Iterate over known values of parameters of NODE and estimate the local
3447 effects in terms of time and size they have. */
3448
3449 static void
3450 estimate_local_effects (struct cgraph_node *node)
3451 {
3452 class ipa_node_params *info = IPA_NODE_REF (node);
3453 int i, count = ipa_get_param_count (info);
3454 vec<tree> known_csts;
3455 vec<ipa_polymorphic_call_context> known_contexts;
3456 vec<ipa_agg_value_set> known_aggs;
3457 bool always_const;
3458 int removable_params_cost;
3459
3460 if (!count || !ipcp_versionable_function_p (node))
3461 return;
3462
3463 if (dump_file && (dump_flags & TDF_DETAILS))
3464 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3465
3466 always_const = gather_context_independent_values (info, &known_csts,
3467 &known_contexts, &known_aggs,
3468 &removable_params_cost);
3469 int devirt_bonus = devirtualization_time_bonus (node, known_csts,
3470 known_contexts, known_aggs);
3471 if (always_const || devirt_bonus
3472 || (removable_params_cost && node->can_change_signature))
3473 {
3474 struct caller_statistics stats;
3475 ipa_hints hints;
3476 sreal time, base_time;
3477 int size;
3478
3479 init_caller_stats (&stats);
3480 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3481 false);
3482 estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
3483 known_aggs, &size, &time,
3484 &base_time, &hints);
3485 time -= devirt_bonus;
3486 time -= hint_time_bonus (node, hints);
3487 time -= removable_params_cost;
3488 size -= stats.n_calls * removable_params_cost;
3489
3490 if (dump_file)
3491 fprintf (dump_file, " - context independent values, size: %i, "
3492 "time_benefit: %f\n", size, (base_time - time).to_double ());
3493
3494 if (size <= 0 || node->local)
3495 {
3496 info->do_clone_for_all_contexts = true;
3497
3498 if (dump_file)
3499 fprintf (dump_file, " Decided to specialize for all "
3500 "known contexts, code not going to grow.\n");
3501 }
3502 else if (good_cloning_opportunity_p (node,
3503 MIN ((base_time - time).to_int (),
3504 65536),
3505 stats.freq_sum, stats.count_sum,
3506 size))
3507 {
3508 if (size + overall_size <= get_max_overall_size (node))
3509 {
3510 info->do_clone_for_all_contexts = true;
3511 overall_size += size;
3512
3513 if (dump_file)
3514 fprintf (dump_file, " Decided to specialize for all "
3515 "known contexts, growth deemed beneficial.\n");
3516 }
3517 else if (dump_file && (dump_flags & TDF_DETAILS))
3518 fprintf (dump_file, " Not cloning for all contexts because "
3519 "maximum unit size would be reached with %li.\n",
3520 size + overall_size);
3521 }
3522 else if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, " Not cloning for all contexts because "
3524 "!good_cloning_opportunity_p.\n");
3525
3526 }
3527
3528 for (i = 0; i < count; i++)
3529 {
3530 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3531 ipcp_lattice<tree> *lat = &plats->itself;
3532 ipcp_value<tree> *val;
3533
3534 if (lat->bottom
3535 || !lat->values
3536 || known_csts[i])
3537 continue;
3538
3539 for (val = lat->values; val; val = val->next)
3540 {
3541 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3542 known_csts[i] = val->value;
3543
3544 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3545 perform_estimation_of_a_value (node, known_csts, known_contexts,
3546 known_aggs,
3547 removable_params_cost, emc, val);
3548
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3550 {
3551 fprintf (dump_file, " - estimates for value ");
3552 print_ipcp_constant_value (dump_file, val->value);
3553 fprintf (dump_file, " for ");
3554 ipa_dump_param (dump_file, info, i);
3555 fprintf (dump_file, ": time_benefit: %i, size: %i\n",
3556 val->local_time_benefit, val->local_size_cost);
3557 }
3558 }
3559 known_csts[i] = NULL_TREE;
3560 }
3561
3562 for (i = 0; i < count; i++)
3563 {
3564 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3565
3566 if (!plats->virt_call)
3567 continue;
3568
3569 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3570 ipcp_value<ipa_polymorphic_call_context> *val;
3571
3572 if (ctxlat->bottom
3573 || !ctxlat->values
3574 || !known_contexts[i].useless_p ())
3575 continue;
3576
3577 for (val = ctxlat->values; val; val = val->next)
3578 {
3579 known_contexts[i] = val->value;
3580 perform_estimation_of_a_value (node, known_csts, known_contexts,
3581 known_aggs,
3582 removable_params_cost, 0, val);
3583
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3585 {
3586 fprintf (dump_file, " - estimates for polymorphic context ");
3587 print_ipcp_constant_value (dump_file, val->value);
3588 fprintf (dump_file, " for ");
3589 ipa_dump_param (dump_file, info, i);
3590 fprintf (dump_file, ": time_benefit: %i, size: %i\n",
3591 val->local_time_benefit, val->local_size_cost);
3592 }
3593 }
3594 known_contexts[i] = ipa_polymorphic_call_context ();
3595 }
3596
3597 for (i = 0; i < count; i++)
3598 {
3599 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3600 struct ipa_agg_value_set *agg;
3601 struct ipcp_agg_lattice *aglat;
3602
3603 if (plats->aggs_bottom || !plats->aggs)
3604 continue;
3605
3606 agg = &known_aggs[i];
3607 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3608 {
3609 ipcp_value<tree> *val;
3610 if (aglat->bottom || !aglat->values
3611 /* If the following is true, the one value is in known_aggs. */
3612 || (!plats->aggs_contain_variable
3613 && aglat->is_single_const ()))
3614 continue;
3615
3616 for (val = aglat->values; val; val = val->next)
3617 {
3618 struct ipa_agg_value item;
3619
3620 item.offset = aglat->offset;
3621 item.value = val->value;
3622 agg->items.safe_push (item);
3623
3624 perform_estimation_of_a_value (node, known_csts, known_contexts,
3625 known_aggs,
3626 removable_params_cost, 0, val);
3627
3628 if (dump_file && (dump_flags & TDF_DETAILS))
3629 {
3630 fprintf (dump_file, " - estimates for value ");
3631 print_ipcp_constant_value (dump_file, val->value);
3632 fprintf (dump_file, " for ");
3633 ipa_dump_param (dump_file, info, i);
3634 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3635 "]: time_benefit: %i, size: %i\n",
3636 plats->aggs_by_ref ? "ref " : "",
3637 aglat->offset,
3638 val->local_time_benefit, val->local_size_cost);
3639 }
3640
3641 agg->items.pop ();
3642 }
3643 }
3644 }
3645
3646 known_csts.release ();
3647 known_contexts.release ();
3648 ipa_release_agg_values (known_aggs);
3649 }
3650
3651
3652 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3653 topological sort of values. */
3654
3655 template <typename valtype>
3656 void
3657 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3658 {
3659 ipcp_value_source<valtype> *src;
3660
3661 if (cur_val->dfs)
3662 return;
3663
3664 dfs_counter++;
3665 cur_val->dfs = dfs_counter;
3666 cur_val->low_link = dfs_counter;
3667
3668 cur_val->topo_next = stack;
3669 stack = cur_val;
3670 cur_val->on_stack = true;
3671
3672 for (src = cur_val->sources; src; src = src->next)
3673 if (src->val)
3674 {
3675 if (src->val->dfs == 0)
3676 {
3677 add_val (src->val);
3678 if (src->val->low_link < cur_val->low_link)
3679 cur_val->low_link = src->val->low_link;
3680 }
3681 else if (src->val->on_stack
3682 && src->val->dfs < cur_val->low_link)
3683 cur_val->low_link = src->val->dfs;
3684 }
3685
3686 if (cur_val->dfs == cur_val->low_link)
3687 {
3688 ipcp_value<valtype> *v, *scc_list = NULL;
3689
3690 do
3691 {
3692 v = stack;
3693 stack = v->topo_next;
3694 v->on_stack = false;
3695
3696 v->scc_next = scc_list;
3697 scc_list = v;
3698 }
3699 while (v != cur_val);
3700
3701 cur_val->topo_next = values_topo;
3702 values_topo = cur_val;
3703 }
3704 }
3705
3706 /* Add all values in lattices associated with NODE to the topological sort if
3707 they are not there yet. */
3708
3709 static void
3710 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3711 {
3712 class ipa_node_params *info = IPA_NODE_REF (node);
3713 int i, count = ipa_get_param_count (info);
3714
3715 for (i = 0; i < count; i++)
3716 {
3717 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3718 ipcp_lattice<tree> *lat = &plats->itself;
3719 struct ipcp_agg_lattice *aglat;
3720
3721 if (!lat->bottom)
3722 {
3723 ipcp_value<tree> *val;
3724 for (val = lat->values; val; val = val->next)
3725 topo->constants.add_val (val);
3726 }
3727
3728 if (!plats->aggs_bottom)
3729 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3730 if (!aglat->bottom)
3731 {
3732 ipcp_value<tree> *val;
3733 for (val = aglat->values; val; val = val->next)
3734 topo->constants.add_val (val);
3735 }
3736
3737 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3738 if (!ctxlat->bottom)
3739 {
3740 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3741 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3742 topo->contexts.add_val (ctxval);
3743 }
3744 }
3745 }
3746
3747 /* One pass of constants propagation along the call graph edges, from callers
3748 to callees (requires topological ordering in TOPO), iterate over strongly
3749 connected components. */
3750
3751 static void
3752 propagate_constants_topo (class ipa_topo_info *topo)
3753 {
3754 int i;
3755
3756 for (i = topo->nnodes - 1; i >= 0; i--)
3757 {
3758 unsigned j;
3759 struct cgraph_node *v, *node = topo->order[i];
3760 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3761
3762 /* First, iteratively propagate within the strongly connected component
3763 until all lattices stabilize. */
3764 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3765 if (v->has_gimple_body_p ())
3766 {
3767 if (opt_for_fn (v->decl, flag_ipa_cp)
3768 && opt_for_fn (v->decl, optimize))
3769 push_node_to_stack (topo, v);
3770 /* When V is not optimized, we can not push it to stack, but
3771 still we need to set all its callees lattices to bottom. */
3772 else
3773 {
3774 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3775 propagate_constants_across_call (cs);
3776 }
3777 }
3778
3779 v = pop_node_from_stack (topo);
3780 while (v)
3781 {
3782 struct cgraph_edge *cs;
3783 class ipa_node_params *info = NULL;
3784 bool self_scc = true;
3785
3786 for (cs = v->callees; cs; cs = cs->next_callee)
3787 if (ipa_edge_within_scc (cs))
3788 {
3789 cgraph_node *callee = cs->callee->function_symbol ();
3790
3791 if (v != callee)
3792 self_scc = false;
3793
3794 if (!info)
3795 {
3796 info = IPA_NODE_REF (v);
3797 info->node_within_scc = true;
3798 }
3799
3800 if (propagate_constants_across_call (cs))
3801 push_node_to_stack (topo, callee);
3802 }
3803
3804 if (info)
3805 info->node_is_self_scc = self_scc;
3806
3807 v = pop_node_from_stack (topo);
3808 }
3809
3810 /* Afterwards, propagate along edges leading out of the SCC, calculates
3811 the local effects of the discovered constants and all valid values to
3812 their topological sort. */
3813 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3814 if (v->has_gimple_body_p ()
3815 && opt_for_fn (v->decl, flag_ipa_cp)
3816 && opt_for_fn (v->decl, optimize))
3817 {
3818 struct cgraph_edge *cs;
3819
3820 estimate_local_effects (v);
3821 add_all_node_vals_to_toposort (v, topo);
3822 for (cs = v->callees; cs; cs = cs->next_callee)
3823 if (!ipa_edge_within_scc (cs))
3824 propagate_constants_across_call (cs);
3825 }
3826 cycle_nodes.release ();
3827 }
3828 }
3829
3830
3831 /* Return the sum of A and B if none of them is bigger than INT_MAX/2, return
3832 the bigger one if otherwise. */
3833
3834 static int
3835 safe_add (int a, int b)
3836 {
3837 if (a > INT_MAX/2 || b > INT_MAX/2)
3838 return a > b ? a : b;
3839 else
3840 return a + b;
3841 }
3842
3843
3844 /* Propagate the estimated effects of individual values along the topological
3845 from the dependent values to those they depend on. */
3846
3847 template <typename valtype>
3848 void
3849 value_topo_info<valtype>::propagate_effects ()
3850 {
3851 ipcp_value<valtype> *base;
3852
3853 for (base = values_topo; base; base = base->topo_next)
3854 {
3855 ipcp_value_source<valtype> *src;
3856 ipcp_value<valtype> *val;
3857 int time = 0, size = 0;
3858
3859 for (val = base; val; val = val->scc_next)
3860 {
3861 time = safe_add (time,
3862 val->local_time_benefit + val->prop_time_benefit);
3863 size = safe_add (size, val->local_size_cost + val->prop_size_cost);
3864 }
3865
3866 for (val = base; val; val = val->scc_next)
3867 for (src = val->sources; src; src = src->next)
3868 if (src->val
3869 && src->cs->maybe_hot_p ())
3870 {
3871 src->val->prop_time_benefit = safe_add (time,
3872 src->val->prop_time_benefit);
3873 src->val->prop_size_cost = safe_add (size,
3874 src->val->prop_size_cost);
3875 }
3876 }
3877 }
3878
3879
3880 /* Propagate constants, polymorphic contexts and their effects from the
3881 summaries interprocedurally. */
3882
3883 static void
3884 ipcp_propagate_stage (class ipa_topo_info *topo)
3885 {
3886 struct cgraph_node *node;
3887
3888 if (dump_file)
3889 fprintf (dump_file, "\n Propagating constants:\n\n");
3890
3891 max_count = profile_count::uninitialized ();
3892
3893 FOR_EACH_DEFINED_FUNCTION (node)
3894 {
3895 if (node->has_gimple_body_p ()
3896 && opt_for_fn (node->decl, flag_ipa_cp)
3897 && opt_for_fn (node->decl, optimize))
3898 {
3899 class ipa_node_params *info = IPA_NODE_REF (node);
3900 determine_versionability (node, info);
3901 info->lattices = XCNEWVEC (class ipcp_param_lattices,
3902 ipa_get_param_count (info));
3903 initialize_node_lattices (node);
3904 }
3905 ipa_size_summary *s = ipa_size_summaries->get (node);
3906 if (node->definition && !node->alias && s != NULL)
3907 overall_size += s->self_size;
3908 max_count = max_count.max (node->count.ipa ());
3909 }
3910
3911 orig_overall_size = overall_size;
3912
3913 if (dump_file)
3914 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
3915
3916 propagate_constants_topo (topo);
3917 if (flag_checking)
3918 ipcp_verify_propagated_values ();
3919 topo->constants.propagate_effects ();
3920 topo->contexts.propagate_effects ();
3921
3922 if (dump_file)
3923 {
3924 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
3925 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
3926 }
3927 }
3928
3929 /* Discover newly direct outgoing edges from NODE which is a new clone with
3930 known KNOWN_CSTS and make them direct. */
3931
3932 static void
3933 ipcp_discover_new_direct_edges (struct cgraph_node *node,
3934 vec<tree> known_csts,
3935 vec<ipa_polymorphic_call_context>
3936 known_contexts,
3937 struct ipa_agg_replacement_value *aggvals)
3938 {
3939 struct cgraph_edge *ie, *next_ie;
3940 bool found = false;
3941
3942 for (ie = node->indirect_calls; ie; ie = next_ie)
3943 {
3944 tree target;
3945 bool speculative;
3946
3947 next_ie = ie->next_callee;
3948 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
3949 vNULL, aggvals, &speculative);
3950 if (target)
3951 {
3952 bool agg_contents = ie->indirect_info->agg_contents;
3953 bool polymorphic = ie->indirect_info->polymorphic;
3954 int param_index = ie->indirect_info->param_index;
3955 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
3956 speculative);
3957 found = true;
3958
3959 if (cs && !agg_contents && !polymorphic)
3960 {
3961 class ipa_node_params *info = IPA_NODE_REF (node);
3962 int c = ipa_get_controlled_uses (info, param_index);
3963 if (c != IPA_UNDESCRIBED_USE)
3964 {
3965 struct ipa_ref *to_del;
3966
3967 c--;
3968 ipa_set_controlled_uses (info, param_index, c);
3969 if (dump_file && (dump_flags & TDF_DETAILS))
3970 fprintf (dump_file, " controlled uses count of param "
3971 "%i bumped down to %i\n", param_index, c);
3972 if (c == 0
3973 && (to_del = node->find_reference (cs->callee, NULL, 0)))
3974 {
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3976 fprintf (dump_file, " and even removing its "
3977 "cloning-created reference\n");
3978 to_del->remove_reference ();
3979 }
3980 }
3981 }
3982 }
3983 }
3984 /* Turning calls to direct calls will improve overall summary. */
3985 if (found)
3986 ipa_update_overall_fn_summary (node);
3987 }
3988
3989 class edge_clone_summary;
3990 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
3991
3992 /* Edge clone summary. */
3993
3994 class edge_clone_summary
3995 {
3996 public:
3997 /* Default constructor. */
3998 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
3999
4000 /* Default destructor. */
4001 ~edge_clone_summary ()
4002 {
4003 if (prev_clone)
4004 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4005 if (next_clone)
4006 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4007 }
4008
4009 cgraph_edge *prev_clone;
4010 cgraph_edge *next_clone;
4011 };
4012
4013 class edge_clone_summary_t:
4014 public call_summary <edge_clone_summary *>
4015 {
4016 public:
4017 edge_clone_summary_t (symbol_table *symtab):
4018 call_summary <edge_clone_summary *> (symtab)
4019 {
4020 m_initialize_when_cloning = true;
4021 }
4022
4023 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4024 edge_clone_summary *src_data,
4025 edge_clone_summary *dst_data);
4026 };
4027
4028 /* Edge duplication hook. */
4029
4030 void
4031 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4032 edge_clone_summary *src_data,
4033 edge_clone_summary *dst_data)
4034 {
4035 if (src_data->next_clone)
4036 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4037 dst_data->prev_clone = src_edge;
4038 dst_data->next_clone = src_data->next_clone;
4039 src_data->next_clone = dst_edge;
4040 }
4041
4042 /* Return true is CS calls DEST or its clone for all contexts. When
4043 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4044 edges from/to an all-context clone. */
4045
4046 static bool
4047 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4048 bool allow_recursion_to_clone)
4049 {
4050 enum availability availability;
4051 cgraph_node *callee = cs->callee->function_symbol (&availability);
4052
4053 if (availability <= AVAIL_INTERPOSABLE)
4054 return false;
4055 if (callee == dest)
4056 return true;
4057 if (!allow_recursion_to_clone && cs->caller == callee)
4058 return false;
4059
4060 class ipa_node_params *info = IPA_NODE_REF (callee);
4061 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4062 }
4063
4064 /* Return true if edge CS does bring about the value described by SRC to
4065 DEST_VAL of node DEST or its clone for all contexts. */
4066
4067 static bool
4068 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4069 cgraph_node *dest, ipcp_value<tree> *dest_val)
4070 {
4071 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
4072
4073 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4074 || caller_info->node_dead)
4075 return false;
4076
4077 if (!src->val)
4078 return true;
4079
4080 if (caller_info->ipcp_orig_node)
4081 {
4082 tree t;
4083 if (src->offset == -1)
4084 t = caller_info->known_csts[src->index];
4085 else
4086 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4087 return (t != NULL_TREE
4088 && values_equal_for_ipcp_p (src->val->value, t));
4089 }
4090 else
4091 {
4092 if (src->val == dest_val)
4093 return true;
4094
4095 struct ipcp_agg_lattice *aglat;
4096 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4097 src->index);
4098 if (src->offset == -1)
4099 return (plats->itself.is_single_const ()
4100 && values_equal_for_ipcp_p (src->val->value,
4101 plats->itself.values->value));
4102 else
4103 {
4104 if (plats->aggs_bottom || plats->aggs_contain_variable)
4105 return false;
4106 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4107 if (aglat->offset == src->offset)
4108 return (aglat->is_single_const ()
4109 && values_equal_for_ipcp_p (src->val->value,
4110 aglat->values->value));
4111 }
4112 return false;
4113 }
4114 }
4115
4116 /* Return true if edge CS does bring about the value described by SRC to
4117 DST_VAL of node DEST or its clone for all contexts. */
4118
4119 static bool
4120 cgraph_edge_brings_value_p (cgraph_edge *cs,
4121 ipcp_value_source<ipa_polymorphic_call_context> *src,
4122 cgraph_node *dest,
4123 ipcp_value<ipa_polymorphic_call_context> *)
4124 {
4125 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
4126
4127 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4128 || caller_info->node_dead)
4129 return false;
4130 if (!src->val)
4131 return true;
4132
4133 if (caller_info->ipcp_orig_node)
4134 return (caller_info->known_contexts.length () > (unsigned) src->index)
4135 && values_equal_for_ipcp_p (src->val->value,
4136 caller_info->known_contexts[src->index]);
4137
4138 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4139 src->index);
4140 return plats->ctxlat.is_single_const ()
4141 && values_equal_for_ipcp_p (src->val->value,
4142 plats->ctxlat.values->value);
4143 }
4144
4145 /* Get the next clone in the linked list of clones of an edge. */
4146
4147 static inline struct cgraph_edge *
4148 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4149 {
4150 edge_clone_summary *s = edge_clone_summaries->get (cs);
4151 return s != NULL ? s->next_clone : NULL;
4152 }
4153
4154 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4155 of them is viable and hot, return true. In that case, for those that still
4156 hold, add their edge frequency and their number into *FREQUENCY and
4157 *CALLER_COUNT respectively. */
4158
4159 template <typename valtype>
4160 static bool
4161 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4162 int *freq_sum,
4163 profile_count *count_sum, int *caller_count)
4164 {
4165 ipcp_value_source<valtype> *src;
4166 int freq = 0, count = 0;
4167 profile_count cnt = profile_count::zero ();
4168 bool hot = false;
4169 bool non_self_recursive = false;
4170
4171 for (src = val->sources; src; src = src->next)
4172 {
4173 struct cgraph_edge *cs = src->cs;
4174 while (cs)
4175 {
4176 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4177 {
4178 count++;
4179 freq += cs->frequency ();
4180 if (cs->count.ipa ().initialized_p ())
4181 cnt += cs->count.ipa ();
4182 hot |= cs->maybe_hot_p ();
4183 if (cs->caller != dest)
4184 non_self_recursive = true;
4185 }
4186 cs = get_next_cgraph_edge_clone (cs);
4187 }
4188 }
4189
4190 /* If the only edges bringing a value are self-recursive ones, do not bother
4191 evaluating it. */
4192 if (!non_self_recursive)
4193 return false;
4194
4195 *freq_sum = freq;
4196 *count_sum = cnt;
4197 *caller_count = count;
4198
4199 if (!hot && IPA_NODE_REF (dest)->node_within_scc)
4200 {
4201 struct cgraph_edge *cs;
4202
4203 /* Cold non-SCC source edge could trigger hot recursive execution of
4204 function. Consider the case as hot and rely on following cost model
4205 computation to further select right one. */
4206 for (cs = dest->callers; cs; cs = cs->next_caller)
4207 if (cs->caller == dest && cs->maybe_hot_p ())
4208 return true;
4209 }
4210
4211 return hot;
4212 }
4213
4214 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4215 to let a non-self-recursive caller be the first element. Thus, we can
4216 simplify intersecting operations on values that arrive from all of these
4217 callers, especially when there exists self-recursive call. Return true if
4218 this kind of adjustment is possible. */
4219
4220 static bool
4221 adjust_callers_for_value_intersection (vec<cgraph_edge *> callers,
4222 cgraph_node *node)
4223 {
4224 for (unsigned i = 0; i < callers.length (); i++)
4225 {
4226 cgraph_edge *cs = callers[i];
4227
4228 if (cs->caller != node)
4229 {
4230 if (i > 0)
4231 {
4232 callers[i] = callers[0];
4233 callers[0] = cs;
4234 }
4235 return true;
4236 }
4237 }
4238 return false;
4239 }
4240
4241 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4242 is assumed their number is known and equal to CALLER_COUNT. */
4243
4244 template <typename valtype>
4245 static vec<cgraph_edge *>
4246 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4247 int caller_count)
4248 {
4249 ipcp_value_source<valtype> *src;
4250 vec<cgraph_edge *> ret;
4251
4252 ret.create (caller_count);
4253 for (src = val->sources; src; src = src->next)
4254 {
4255 struct cgraph_edge *cs = src->cs;
4256 while (cs)
4257 {
4258 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4259 ret.quick_push (cs);
4260 cs = get_next_cgraph_edge_clone (cs);
4261 }
4262 }
4263
4264 if (caller_count > 1)
4265 adjust_callers_for_value_intersection (ret, dest);
4266
4267 return ret;
4268 }
4269
4270 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4271 Return it or NULL if for some reason it cannot be created. */
4272
4273 static struct ipa_replace_map *
4274 get_replacement_map (class ipa_node_params *info, tree value, int parm_num)
4275 {
4276 struct ipa_replace_map *replace_map;
4277
4278 replace_map = ggc_alloc<ipa_replace_map> ();
4279 if (dump_file)
4280 {
4281 fprintf (dump_file, " replacing ");
4282 ipa_dump_param (dump_file, info, parm_num);
4283
4284 fprintf (dump_file, " with const ");
4285 print_generic_expr (dump_file, value);
4286 fprintf (dump_file, "\n");
4287 }
4288 replace_map->parm_num = parm_num;
4289 replace_map->new_tree = value;
4290 return replace_map;
4291 }
4292
4293 /* Dump new profiling counts */
4294
4295 static void
4296 dump_profile_updates (struct cgraph_node *orig_node,
4297 struct cgraph_node *new_node)
4298 {
4299 struct cgraph_edge *cs;
4300
4301 fprintf (dump_file, " setting count of the specialized node to ");
4302 new_node->count.dump (dump_file);
4303 fprintf (dump_file, "\n");
4304 for (cs = new_node->callees; cs; cs = cs->next_callee)
4305 {
4306 fprintf (dump_file, " edge to %s has count ",
4307 cs->callee->dump_name ());
4308 cs->count.dump (dump_file);
4309 fprintf (dump_file, "\n");
4310 }
4311
4312 fprintf (dump_file, " setting count of the original node to ");
4313 orig_node->count.dump (dump_file);
4314 fprintf (dump_file, "\n");
4315 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4316 {
4317 fprintf (dump_file, " edge to %s is left with ",
4318 cs->callee->dump_name ());
4319 cs->count.dump (dump_file);
4320 fprintf (dump_file, "\n");
4321 }
4322 }
4323
4324 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4325 their profile information to reflect this. */
4326
4327 static void
4328 update_profiling_info (struct cgraph_node *orig_node,
4329 struct cgraph_node *new_node)
4330 {
4331 struct cgraph_edge *cs;
4332 struct caller_statistics stats;
4333 profile_count new_sum, orig_sum;
4334 profile_count remainder, orig_node_count = orig_node->count;
4335 profile_count orig_new_node_count = new_node->count;
4336
4337 if (!(orig_node_count.ipa () > profile_count::zero ()))
4338 return;
4339
4340 init_caller_stats (&stats);
4341 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4342 false);
4343 orig_sum = stats.count_sum;
4344 init_caller_stats (&stats);
4345 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4346 false);
4347 new_sum = stats.count_sum;
4348
4349 if (orig_node_count < orig_sum + new_sum)
4350 {
4351 if (dump_file)
4352 {
4353 fprintf (dump_file, " Problem: node %s has too low count ",
4354 orig_node->dump_name ());
4355 orig_node_count.dump (dump_file);
4356 fprintf (dump_file, "while the sum of incoming count is ");
4357 (orig_sum + new_sum).dump (dump_file);
4358 fprintf (dump_file, "\n");
4359 }
4360
4361 orig_node_count = (orig_sum + new_sum).apply_scale (12, 10);
4362 if (dump_file)
4363 {
4364 fprintf (dump_file, " proceeding by pretending it was ");
4365 orig_node_count.dump (dump_file);
4366 fprintf (dump_file, "\n");
4367 }
4368 }
4369
4370 remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
4371 - new_sum.ipa ());
4372
4373 /* With partial train run we do not want to assume that original's
4374 count is zero whenever we redurect all executed edges to clone.
4375 Simply drop profile to local one in this case. */
4376 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4377 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4378 && flag_profile_partial_training)
4379 remainder = remainder.guessed_local ();
4380
4381 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4382 new_node->count = new_sum;
4383 orig_node->count = remainder;
4384
4385 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4386 for (cs = new_node->callees; cs; cs = cs->next_callee)
4387 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4388 for (cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4389 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4390
4391 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4392 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4393 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4394 for (cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4395 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4396
4397 if (dump_file)
4398 dump_profile_updates (orig_node, new_node);
4399 }
4400
4401 /* Update the respective profile of specialized NEW_NODE and the original
4402 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4403 have been redirected to the specialized version. */
4404
4405 static void
4406 update_specialized_profile (struct cgraph_node *new_node,
4407 struct cgraph_node *orig_node,
4408 profile_count redirected_sum)
4409 {
4410 struct cgraph_edge *cs;
4411 profile_count new_node_count, orig_node_count = orig_node->count;
4412
4413 if (dump_file)
4414 {
4415 fprintf (dump_file, " the sum of counts of redirected edges is ");
4416 redirected_sum.dump (dump_file);
4417 fprintf (dump_file, "\n");
4418 }
4419 if (!(orig_node_count > profile_count::zero ()))
4420 return;
4421
4422 gcc_assert (orig_node_count >= redirected_sum);
4423
4424 new_node_count = new_node->count;
4425 new_node->count += redirected_sum;
4426 orig_node->count -= redirected_sum;
4427
4428 for (cs = new_node->callees; cs; cs = cs->next_callee)
4429 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4430
4431 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4432 {
4433 profile_count dec = cs->count.apply_scale (redirected_sum,
4434 orig_node_count);
4435 cs->count -= dec;
4436 }
4437
4438 if (dump_file)
4439 dump_profile_updates (orig_node, new_node);
4440 }
4441
4442 /* Return true if we would like to remove a parameter from NODE when cloning it
4443 with KNOWN_CSTS scalar constants. */
4444
4445 static bool
4446 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
4447 {
4448 auto_vec<bool, 16> surviving;
4449 bool filled_vec = false;
4450 ipa_node_params *info = IPA_NODE_REF (node);
4451 int i, count = ipa_get_param_count (info);
4452
4453 for (i = 0; i < count; i++)
4454 {
4455 if (!known_csts[i] && ipa_is_param_used (info, i))
4456 continue;
4457
4458 if (!filled_vec)
4459 {
4460 if (!node->clone.param_adjustments)
4461 return true;
4462 node->clone.param_adjustments->get_surviving_params (&surviving);
4463 filled_vec = true;
4464 }
4465 if (surviving.length() < (unsigned) i && surviving[i])
4466 return true;
4467 }
4468 return false;
4469 }
4470
4471 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
4472 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
4473 redirect all edges in CALLERS to it. */
4474
4475 static struct cgraph_node *
4476 create_specialized_node (struct cgraph_node *node,
4477 vec<tree> known_csts,
4478 vec<ipa_polymorphic_call_context> known_contexts,
4479 struct ipa_agg_replacement_value *aggvals,
4480 vec<cgraph_edge *> callers)
4481 {
4482 class ipa_node_params *new_info, *info = IPA_NODE_REF (node);
4483 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
4484 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
4485 struct ipa_agg_replacement_value *av;
4486 struct cgraph_node *new_node;
4487 int i, count = ipa_get_param_count (info);
4488 ipa_param_adjustments *old_adjustments = node->clone.param_adjustments;
4489 ipa_param_adjustments *new_adjustments;
4490 gcc_assert (!info->ipcp_orig_node);
4491 gcc_assert (node->can_change_signature
4492 || !old_adjustments);
4493
4494 if (old_adjustments)
4495 {
4496 /* At the moment all IPA optimizations should use the number of
4497 parameters of the prevailing decl as the m_always_copy_start.
4498 Handling any other value would complicate the code below, so for the
4499 time bing let's only assert it is so. */
4500 gcc_assert (old_adjustments->m_always_copy_start == count
4501 || old_adjustments->m_always_copy_start < 0);
4502 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
4503 for (i = 0; i < old_adj_count; i++)
4504 {
4505 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4506 if (!node->can_change_signature
4507 || old_adj->op != IPA_PARAM_OP_COPY
4508 || (!known_csts[old_adj->base_index]
4509 && ipa_is_param_used (info, old_adj->base_index)))
4510 {
4511 ipa_adjusted_param new_adj = *old_adj;
4512
4513 new_adj.prev_clone_adjustment = true;
4514 new_adj.prev_clone_index = i;
4515 vec_safe_push (new_params, new_adj);
4516 }
4517 }
4518 bool skip_return = old_adjustments->m_skip_return;
4519 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
4520 ipa_param_adjustments (new_params, count,
4521 skip_return));
4522 }
4523 else if (node->can_change_signature
4524 && want_remove_some_param_p (node, known_csts))
4525 {
4526 ipa_adjusted_param adj;
4527 memset (&adj, 0, sizeof (adj));
4528 adj.op = IPA_PARAM_OP_COPY;
4529 for (i = 0; i < count; i++)
4530 if (!known_csts[i] && ipa_is_param_used (info, i))
4531 {
4532 adj.base_index = i;
4533 adj.prev_clone_index = i;
4534 vec_safe_push (new_params, adj);
4535 }
4536 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
4537 ipa_param_adjustments (new_params, count, false));
4538 }
4539 else
4540 new_adjustments = NULL;
4541
4542 replace_trees = vec_safe_copy (node->clone.tree_map);
4543 for (i = 0; i < count; i++)
4544 {
4545 tree t = known_csts[i];
4546 if (t)
4547 {
4548 struct ipa_replace_map *replace_map;
4549
4550 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
4551 replace_map = get_replacement_map (info, t, i);
4552 if (replace_map)
4553 vec_safe_push (replace_trees, replace_map);
4554 }
4555 }
4556 auto_vec<cgraph_edge *, 2> self_recursive_calls;
4557 for (i = callers.length () - 1; i >= 0; i--)
4558 {
4559 cgraph_edge *cs = callers[i];
4560 if (cs->caller == node)
4561 {
4562 self_recursive_calls.safe_push (cs);
4563 callers.unordered_remove (i);
4564 }
4565 }
4566
4567 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4568 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4569 node->decl)));
4570 new_node = node->create_virtual_clone (callers, replace_trees,
4571 new_adjustments, "constprop",
4572 suffix_counter);
4573 suffix_counter++;
4574
4575 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
4576 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
4577 {
4578 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
4579 /* Cloned edges can disappear during cloning as speculation can be
4580 resolved, check that we have one and that it comes from the last
4581 cloning. */
4582 if (cs && cs->caller == new_node)
4583 cs->redirect_callee_duplicating_thunks (new_node);
4584 /* Any future code that would make more than one clone of an outgoing
4585 edge would confuse this mechanism, so let's check that does not
4586 happen. */
4587 gcc_checking_assert (!cs
4588 || !get_next_cgraph_edge_clone (cs)
4589 || get_next_cgraph_edge_clone (cs)->caller != new_node);
4590 }
4591 if (have_self_recursive_calls)
4592 new_node->expand_all_artificial_thunks ();
4593
4594 ipa_set_node_agg_value_chain (new_node, aggvals);
4595 for (av = aggvals; av; av = av->next)
4596 new_node->maybe_create_reference (av->value, NULL);
4597
4598 if (dump_file && (dump_flags & TDF_DETAILS))
4599 {
4600 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
4601 if (known_contexts.exists ())
4602 {
4603 for (i = 0; i < count; i++)
4604 if (!known_contexts[i].useless_p ())
4605 {
4606 fprintf (dump_file, " known ctx %i is ", i);
4607 known_contexts[i].dump (dump_file);
4608 }
4609 }
4610 if (aggvals)
4611 ipa_dump_agg_replacement_values (dump_file, aggvals);
4612 }
4613 ipa_check_create_node_params ();
4614 update_profiling_info (node, new_node);
4615 new_info = IPA_NODE_REF (new_node);
4616 new_info->ipcp_orig_node = node;
4617 new_node->ipcp_clone = true;
4618 new_info->known_csts = known_csts;
4619 new_info->known_contexts = known_contexts;
4620
4621 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
4622
4623 callers.release ();
4624 return new_node;
4625 }
4626
4627 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
4628 pass-through function to itself when the cgraph_node involved is not an
4629 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
4630 no-operation pass-through. */
4631
4632 static bool
4633 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
4634 bool simple = true)
4635 {
4636 enum availability availability;
4637 if (cs->caller == cs->callee->function_symbol (&availability)
4638 && availability > AVAIL_INTERPOSABLE
4639 && jfunc->type == IPA_JF_PASS_THROUGH
4640 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4641 && ipa_get_jf_pass_through_formal_id (jfunc) == i
4642 && IPA_NODE_REF (cs->caller)
4643 && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
4644 return true;
4645 return false;
4646 }
4647
4648 /* Return true if JFUNC, which describes a part of an aggregate represented or
4649 pointed to by the i-th parameter of call CS, is a pass-through function to
4650 itself when the cgraph_node involved is not an IPA-CP clone.. When
4651 SIMPLE is true, further check if JFUNC is a simple no-operation
4652 pass-through. */
4653
4654 static bool
4655 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
4656 int i, bool simple = true)
4657 {
4658 enum availability availability;
4659 if (cs->caller == cs->callee->function_symbol (&availability)
4660 && availability > AVAIL_INTERPOSABLE
4661 && jfunc->jftype == IPA_JF_LOAD_AGG
4662 && jfunc->offset == jfunc->value.load_agg.offset
4663 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
4664 && jfunc->value.pass_through.formal_id == i
4665 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
4666 && IPA_NODE_REF (cs->caller)
4667 && !IPA_NODE_REF (cs->caller)->ipcp_orig_node)
4668 return true;
4669 return false;
4670 }
4671
4672 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
4673 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
4674
4675 static void
4676 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
4677 vec<tree> known_csts,
4678 vec<cgraph_edge *> callers)
4679 {
4680 class ipa_node_params *info = IPA_NODE_REF (node);
4681 int i, count = ipa_get_param_count (info);
4682
4683 for (i = 0; i < count; i++)
4684 {
4685 struct cgraph_edge *cs;
4686 tree newval = NULL_TREE;
4687 int j;
4688 bool first = true;
4689 tree type = ipa_get_type (info, i);
4690
4691 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
4692 continue;
4693
4694 FOR_EACH_VEC_ELT (callers, j, cs)
4695 {
4696 struct ipa_jump_func *jump_func;
4697 tree t;
4698
4699 if (!IPA_EDGE_REF (cs)
4700 || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
4701 || (i == 0
4702 && call_passes_through_thunk_p (cs)))
4703 {
4704 newval = NULL_TREE;
4705 break;
4706 }
4707 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
4708
4709 /* Besides simple pass-through jump function, arithmetic jump
4710 function could also introduce argument-direct-pass-through for
4711 self-feeding recursive call. For example,
4712
4713 fn (int i)
4714 {
4715 fn (i & 1);
4716 }
4717
4718 Given that i is 0, recursive propagation via (i & 1) also gets
4719 0. */
4720 if (self_recursive_pass_through_p (cs, jump_func, i, false))
4721 {
4722 gcc_assert (newval);
4723 t = ipa_get_jf_arith_result (
4724 ipa_get_jf_pass_through_operation (jump_func),
4725 newval,
4726 ipa_get_jf_pass_through_operand (jump_func),
4727 type);
4728 }
4729 else
4730 t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func,
4731 type);
4732 if (!t
4733 || (newval
4734 && !values_equal_for_ipcp_p (t, newval))
4735 || (!first && !newval))
4736 {
4737 newval = NULL_TREE;
4738 break;
4739 }
4740 else
4741 newval = t;
4742 first = false;
4743 }
4744
4745 if (newval)
4746 {
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4748 {
4749 fprintf (dump_file, " adding an extra known scalar value ");
4750 print_ipcp_constant_value (dump_file, newval);
4751 fprintf (dump_file, " for ");
4752 ipa_dump_param (dump_file, info, i);
4753 fprintf (dump_file, "\n");
4754 }
4755
4756 known_csts[i] = newval;
4757 }
4758 }
4759 }
4760
4761 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
4762 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
4763 CALLERS. */
4764
4765 static void
4766 find_more_contexts_for_caller_subset (cgraph_node *node,
4767 vec<ipa_polymorphic_call_context>
4768 *known_contexts,
4769 vec<cgraph_edge *> callers)
4770 {
4771 ipa_node_params *info = IPA_NODE_REF (node);
4772 int i, count = ipa_get_param_count (info);
4773
4774 for (i = 0; i < count; i++)
4775 {
4776 cgraph_edge *cs;
4777
4778 if (ipa_get_poly_ctx_lat (info, i)->bottom
4779 || (known_contexts->exists ()
4780 && !(*known_contexts)[i].useless_p ()))
4781 continue;
4782
4783 ipa_polymorphic_call_context newval;
4784 bool first = true;
4785 int j;
4786
4787 FOR_EACH_VEC_ELT (callers, j, cs)
4788 {
4789 if (!IPA_EDGE_REF (cs)
4790 || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)))
4791 return;
4792 ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs),
4793 i);
4794 ipa_polymorphic_call_context ctx;
4795 ctx = ipa_context_from_jfunc (IPA_NODE_REF (cs->caller), cs, i,
4796 jfunc);
4797 if (first)
4798 {
4799 newval = ctx;
4800 first = false;
4801 }
4802 else
4803 newval.meet_with (ctx);
4804 if (newval.useless_p ())
4805 break;
4806 }
4807
4808 if (!newval.useless_p ())
4809 {
4810 if (dump_file && (dump_flags & TDF_DETAILS))
4811 {
4812 fprintf (dump_file, " adding an extra known polymorphic "
4813 "context ");
4814 print_ipcp_constant_value (dump_file, newval);
4815 fprintf (dump_file, " for ");
4816 ipa_dump_param (dump_file, info, i);
4817 fprintf (dump_file, "\n");
4818 }
4819
4820 if (!known_contexts->exists ())
4821 known_contexts->safe_grow_cleared (ipa_get_param_count (info));
4822 (*known_contexts)[i] = newval;
4823 }
4824
4825 }
4826 }
4827
4828 /* Go through PLATS and create a vector of values consisting of values and
4829 offsets (minus OFFSET) of lattices that contain only a single value. */
4830
4831 static vec<ipa_agg_value>
4832 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
4833 {
4834 vec<ipa_agg_value> res = vNULL;
4835
4836 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
4837 return vNULL;
4838
4839 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
4840 if (aglat->is_single_const ())
4841 {
4842 struct ipa_agg_value ti;
4843 ti.offset = aglat->offset - offset;
4844 ti.value = aglat->values->value;
4845 res.safe_push (ti);
4846 }
4847 return res;
4848 }
4849
4850 /* Intersect all values in INTER with single value lattices in PLATS (while
4851 subtracting OFFSET). */
4852
4853 static void
4854 intersect_with_plats (class ipcp_param_lattices *plats,
4855 vec<ipa_agg_value> *inter,
4856 HOST_WIDE_INT offset)
4857 {
4858 struct ipcp_agg_lattice *aglat;
4859 struct ipa_agg_value *item;
4860 int k;
4861
4862 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
4863 {
4864 inter->release ();
4865 return;
4866 }
4867
4868 aglat = plats->aggs;
4869 FOR_EACH_VEC_ELT (*inter, k, item)
4870 {
4871 bool found = false;
4872 if (!item->value)
4873 continue;
4874 while (aglat)
4875 {
4876 if (aglat->offset - offset > item->offset)
4877 break;
4878 if (aglat->offset - offset == item->offset)
4879 {
4880 if (aglat->is_single_const ())
4881 {
4882 tree value = aglat->values->value;
4883
4884 if (values_equal_for_ipcp_p (item->value, value))
4885 found = true;
4886 }
4887 break;
4888 }
4889 aglat = aglat->next;
4890 }
4891 if (!found)
4892 item->value = NULL_TREE;
4893 }
4894 }
4895
4896 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
4897 vector result while subtracting OFFSET from the individual value offsets. */
4898
4899 static vec<ipa_agg_value>
4900 agg_replacements_to_vector (struct cgraph_node *node, int index,
4901 HOST_WIDE_INT offset)
4902 {
4903 struct ipa_agg_replacement_value *av;
4904 vec<ipa_agg_value> res = vNULL;
4905
4906 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
4907 if (av->index == index
4908 && (av->offset - offset) >= 0)
4909 {
4910 struct ipa_agg_value item;
4911 gcc_checking_assert (av->value);
4912 item.offset = av->offset - offset;
4913 item.value = av->value;
4914 res.safe_push (item);
4915 }
4916
4917 return res;
4918 }
4919
4920 /* Intersect all values in INTER with those that we have already scheduled to
4921 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
4922 (while subtracting OFFSET). */
4923
4924 static void
4925 intersect_with_agg_replacements (struct cgraph_node *node, int index,
4926 vec<ipa_agg_value> *inter,
4927 HOST_WIDE_INT offset)
4928 {
4929 struct ipa_agg_replacement_value *srcvals;
4930 struct ipa_agg_value *item;
4931 int i;
4932
4933 srcvals = ipa_get_agg_replacements_for_node (node);
4934 if (!srcvals)
4935 {
4936 inter->release ();
4937 return;
4938 }
4939
4940 FOR_EACH_VEC_ELT (*inter, i, item)
4941 {
4942 struct ipa_agg_replacement_value *av;
4943 bool found = false;
4944 if (!item->value)
4945 continue;
4946 for (av = srcvals; av; av = av->next)
4947 {
4948 gcc_checking_assert (av->value);
4949 if (av->index == index
4950 && av->offset - offset == item->offset)
4951 {
4952 if (values_equal_for_ipcp_p (item->value, av->value))
4953 found = true;
4954 break;
4955 }
4956 }
4957 if (!found)
4958 item->value = NULL_TREE;
4959 }
4960 }
4961
4962 /* Intersect values in INTER with aggregate values that come along edge CS to
4963 parameter number INDEX and return it. If INTER does not actually exist yet,
4964 copy all incoming values to it. If we determine we ended up with no values
4965 whatsoever, return a released vector. */
4966
4967 static vec<ipa_agg_value>
4968 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
4969 vec<ipa_agg_value> inter)
4970 {
4971 struct ipa_jump_func *jfunc;
4972 jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index);
4973 if (jfunc->type == IPA_JF_PASS_THROUGH
4974 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4975 {
4976 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
4977 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
4978
4979 if (caller_info->ipcp_orig_node)
4980 {
4981 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
4982 class ipcp_param_lattices *orig_plats;
4983 orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node),
4984 src_idx);
4985 if (agg_pass_through_permissible_p (orig_plats, jfunc))
4986 {
4987 if (!inter.exists ())
4988 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
4989 else
4990 intersect_with_agg_replacements (cs->caller, src_idx,
4991 &inter, 0);
4992 }
4993 else
4994 {
4995 inter.release ();
4996 return vNULL;
4997 }
4998 }
4999 else
5000 {
5001 class ipcp_param_lattices *src_plats;
5002 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5003 if (agg_pass_through_permissible_p (src_plats, jfunc))
5004 {
5005 /* Currently we do not produce clobber aggregate jump
5006 functions, adjust when we do. */
5007 gcc_checking_assert (!jfunc->agg.items);
5008 if (!inter.exists ())
5009 inter = copy_plats_to_inter (src_plats, 0);
5010 else
5011 intersect_with_plats (src_plats, &inter, 0);
5012 }
5013 else
5014 {
5015 inter.release ();
5016 return vNULL;
5017 }
5018 }
5019 }
5020 else if (jfunc->type == IPA_JF_ANCESTOR
5021 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5022 {
5023 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
5024 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5025 class ipcp_param_lattices *src_plats;
5026 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5027
5028 if (caller_info->ipcp_orig_node)
5029 {
5030 if (!inter.exists ())
5031 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5032 else
5033 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5034 delta);
5035 }
5036 else
5037 {
5038 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5039 /* Currently we do not produce clobber aggregate jump
5040 functions, adjust when we do. */
5041 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5042 if (!inter.exists ())
5043 inter = copy_plats_to_inter (src_plats, delta);
5044 else
5045 intersect_with_plats (src_plats, &inter, delta);
5046 }
5047 }
5048 else if (jfunc->agg.items)
5049 {
5050 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
5051 struct ipa_agg_value *item;
5052 int k;
5053
5054 if (!inter.exists ())
5055 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5056 {
5057 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5058 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5059 agg_item);
5060 if (value)
5061 {
5062 struct ipa_agg_value agg_value;
5063
5064 agg_value.value = value;
5065 agg_value.offset = agg_item->offset;
5066 inter.safe_push (agg_value);
5067 }
5068 }
5069 else
5070 FOR_EACH_VEC_ELT (inter, k, item)
5071 {
5072 int l = 0;
5073 bool found = false;
5074
5075 if (!item->value)
5076 continue;
5077
5078 while ((unsigned) l < jfunc->agg.items->length ())
5079 {
5080 struct ipa_agg_jf_item *ti;
5081 ti = &(*jfunc->agg.items)[l];
5082 if (ti->offset > item->offset)
5083 break;
5084 if (ti->offset == item->offset)
5085 {
5086 tree value;
5087
5088 /* Besides simple pass-through aggregate jump function,
5089 arithmetic aggregate jump function could also bring
5090 same aggregate value as parameter passed-in for
5091 self-feeding recursive call. For example,
5092
5093 fn (int *i)
5094 {
5095 int j = *i & 1;
5096 fn (&j);
5097 }
5098
5099 Given that *i is 0, recursive propagation via (*i & 1)
5100 also gets 0. */
5101 if (self_recursive_agg_pass_through_p (cs, ti, index,
5102 false))
5103 value = ipa_get_jf_arith_result (
5104 ti->value.pass_through.operation,
5105 item->value,
5106 ti->value.pass_through.operand,
5107 ti->type);
5108 else
5109 value = ipa_agg_value_from_node (caller_info,
5110 cs->caller, ti);
5111
5112 if (value && values_equal_for_ipcp_p (item->value, value))
5113 found = true;
5114 break;
5115 }
5116 l++;
5117 }
5118 if (!found)
5119 item->value = NULL;
5120 }
5121 }
5122 else
5123 {
5124 inter.release ();
5125 return vNULL;
5126 }
5127 return inter;
5128 }
5129
5130 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5131 from all of them. */
5132
5133 static struct ipa_agg_replacement_value *
5134 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5135 vec<cgraph_edge *> callers)
5136 {
5137 class ipa_node_params *dest_info = IPA_NODE_REF (node);
5138 struct ipa_agg_replacement_value *res;
5139 struct ipa_agg_replacement_value **tail = &res;
5140 struct cgraph_edge *cs;
5141 int i, j, count = ipa_get_param_count (dest_info);
5142
5143 FOR_EACH_VEC_ELT (callers, j, cs)
5144 {
5145 if (!IPA_EDGE_REF (cs))
5146 {
5147 count = 0;
5148 break;
5149 }
5150 int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs));
5151 if (c < count)
5152 count = c;
5153 }
5154
5155 for (i = 0; i < count; i++)
5156 {
5157 struct cgraph_edge *cs;
5158 vec<ipa_agg_value> inter = vNULL;
5159 struct ipa_agg_value *item;
5160 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5161 int j;
5162
5163 /* Among other things, the following check should deal with all by_ref
5164 mismatches. */
5165 if (plats->aggs_bottom)
5166 continue;
5167
5168 FOR_EACH_VEC_ELT (callers, j, cs)
5169 {
5170 struct ipa_jump_func *jfunc
5171 = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
5172 if (self_recursive_pass_through_p (cs, jfunc, i)
5173 && (!plats->aggs_by_ref
5174 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5175 continue;
5176 inter = intersect_aggregates_with_edge (cs, i, inter);
5177
5178 if (!inter.exists ())
5179 goto next_param;
5180 }
5181
5182 FOR_EACH_VEC_ELT (inter, j, item)
5183 {
5184 struct ipa_agg_replacement_value *v;
5185
5186 if (!item->value)
5187 continue;
5188
5189 v = ggc_alloc<ipa_agg_replacement_value> ();
5190 v->index = i;
5191 v->offset = item->offset;
5192 v->value = item->value;
5193 v->by_ref = plats->aggs_by_ref;
5194 *tail = v;
5195 tail = &v->next;
5196 }
5197
5198 next_param:
5199 if (inter.exists ())
5200 inter.release ();
5201 }
5202 *tail = NULL;
5203 return res;
5204 }
5205
5206 /* Determine whether CS also brings all scalar values that the NODE is
5207 specialized for. */
5208
5209 static bool
5210 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5211 struct cgraph_node *node)
5212 {
5213 class ipa_node_params *dest_info = IPA_NODE_REF (node);
5214 int count = ipa_get_param_count (dest_info);
5215 class ipa_node_params *caller_info;
5216 class ipa_edge_args *args;
5217 int i;
5218
5219 caller_info = IPA_NODE_REF (cs->caller);
5220 args = IPA_EDGE_REF (cs);
5221 for (i = 0; i < count; i++)
5222 {
5223 struct ipa_jump_func *jump_func;
5224 tree val, t;
5225
5226 val = dest_info->known_csts[i];
5227 if (!val)
5228 continue;
5229
5230 if (i >= ipa_get_cs_argument_count (args))
5231 return false;
5232 jump_func = ipa_get_ith_jump_func (args, i);
5233 t = ipa_value_from_jfunc (caller_info, jump_func,
5234 ipa_get_type (dest_info, i));
5235 if (!t || !values_equal_for_ipcp_p (val, t))
5236 return false;
5237 }
5238 return true;
5239 }
5240
5241 /* Determine whether CS also brings all aggregate values that NODE is
5242 specialized for. */
5243 static bool
5244 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5245 struct cgraph_node *node)
5246 {
5247 class ipa_node_params *orig_node_info;
5248 struct ipa_agg_replacement_value *aggval;
5249 int i, ec, count;
5250
5251 aggval = ipa_get_agg_replacements_for_node (node);
5252 if (!aggval)
5253 return true;
5254
5255 count = ipa_get_param_count (IPA_NODE_REF (node));
5256 ec = ipa_get_cs_argument_count (IPA_EDGE_REF (cs));
5257 if (ec < count)
5258 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5259 if (aggval->index >= ec)
5260 return false;
5261
5262 orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node);
5263
5264 for (i = 0; i < count; i++)
5265 {
5266 class ipcp_param_lattices *plats;
5267 bool interesting = false;
5268 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5269 if (aggval->index == i)
5270 {
5271 interesting = true;
5272 break;
5273 }
5274 if (!interesting)
5275 continue;
5276
5277 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5278 if (plats->aggs_bottom)
5279 return false;
5280
5281 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5282 if (!values.exists ())
5283 return false;
5284
5285 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5286 if (aggval->index == i)
5287 {
5288 struct ipa_agg_value *item;
5289 int j;
5290 bool found = false;
5291 FOR_EACH_VEC_ELT (values, j, item)
5292 if (item->value
5293 && item->offset == av->offset
5294 && values_equal_for_ipcp_p (item->value, av->value))
5295 {
5296 found = true;
5297 break;
5298 }
5299 if (!found)
5300 {
5301 values.release ();
5302 return false;
5303 }
5304 }
5305 values.release ();
5306 }
5307 return true;
5308 }
5309
5310 /* Given an original NODE and a VAL for which we have already created a
5311 specialized clone, look whether there are incoming edges that still lead
5312 into the old node but now also bring the requested value and also conform to
5313 all other criteria such that they can be redirected the special node.
5314 This function can therefore redirect the final edge in a SCC. */
5315
5316 template <typename valtype>
5317 static void
5318 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5319 {
5320 ipcp_value_source<valtype> *src;
5321 profile_count redirected_sum = profile_count::zero ();
5322
5323 for (src = val->sources; src; src = src->next)
5324 {
5325 struct cgraph_edge *cs = src->cs;
5326 while (cs)
5327 {
5328 if (cgraph_edge_brings_value_p (cs, src, node, val)
5329 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5330 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5331 {
5332 if (dump_file)
5333 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5334 cs->caller->dump_name (),
5335 val->spec_node->dump_name ());
5336
5337 cs->redirect_callee_duplicating_thunks (val->spec_node);
5338 val->spec_node->expand_all_artificial_thunks ();
5339 if (cs->count.ipa ().initialized_p ())
5340 redirected_sum = redirected_sum + cs->count.ipa ();
5341 }
5342 cs = get_next_cgraph_edge_clone (cs);
5343 }
5344 }
5345
5346 if (redirected_sum.nonzero_p ())
5347 update_specialized_profile (val->spec_node, node, redirected_sum);
5348 }
5349
5350 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5351
5352 static bool
5353 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5354 {
5355 ipa_polymorphic_call_context *ctx;
5356 int i;
5357
5358 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5359 if (!ctx->useless_p ())
5360 return true;
5361 return false;
5362 }
5363
5364 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5365
5366 static vec<ipa_polymorphic_call_context>
5367 copy_useful_known_contexts (vec<ipa_polymorphic_call_context> known_contexts)
5368 {
5369 if (known_contexts_useful_p (known_contexts))
5370 return known_contexts.copy ();
5371 else
5372 return vNULL;
5373 }
5374
5375 /* Copy KNOWN_CSTS and modify the copy according to VAL and INDEX. If
5376 non-empty, replace KNOWN_CONTEXTS with its copy too. */
5377
5378 static void
5379 modify_known_vectors_with_val (vec<tree> *known_csts,
5380 vec<ipa_polymorphic_call_context> *known_contexts,
5381 ipcp_value<tree> *val,
5382 int index)
5383 {
5384 *known_csts = known_csts->copy ();
5385 *known_contexts = copy_useful_known_contexts (*known_contexts);
5386 (*known_csts)[index] = val->value;
5387 }
5388
5389 /* Replace KNOWN_CSTS with its copy. Also copy KNOWN_CONTEXTS and modify the
5390 copy according to VAL and INDEX. */
5391
5392 static void
5393 modify_known_vectors_with_val (vec<tree> *known_csts,
5394 vec<ipa_polymorphic_call_context> *known_contexts,
5395 ipcp_value<ipa_polymorphic_call_context> *val,
5396 int index)
5397 {
5398 *known_csts = known_csts->copy ();
5399 *known_contexts = known_contexts->copy ();
5400 (*known_contexts)[index] = val->value;
5401 }
5402
5403 /* Return true if OFFSET indicates this was not an aggregate value or there is
5404 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5405 AGGVALS list. */
5406
5407 DEBUG_FUNCTION bool
5408 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
5409 int index, HOST_WIDE_INT offset, tree value)
5410 {
5411 if (offset == -1)
5412 return true;
5413
5414 while (aggvals)
5415 {
5416 if (aggvals->index == index
5417 && aggvals->offset == offset
5418 && values_equal_for_ipcp_p (aggvals->value, value))
5419 return true;
5420 aggvals = aggvals->next;
5421 }
5422 return false;
5423 }
5424
5425 /* Return true if offset is minus one because source of a polymorphic context
5426 cannot be an aggregate value. */
5427
5428 DEBUG_FUNCTION bool
5429 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
5430 int , HOST_WIDE_INT offset,
5431 ipa_polymorphic_call_context)
5432 {
5433 return offset == -1;
5434 }
5435
5436 /* Decide whether to create a special version of NODE for value VAL of parameter
5437 at the given INDEX. If OFFSET is -1, the value is for the parameter itself,
5438 otherwise it is stored at the given OFFSET of the parameter. KNOWN_CSTS,
5439 KNOWN_CONTEXTS and KNOWN_AGGS describe the other already known values. */
5440
5441 template <typename valtype>
5442 static bool
5443 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5444 ipcp_value<valtype> *val, vec<tree> known_csts,
5445 vec<ipa_polymorphic_call_context> known_contexts)
5446 {
5447 struct ipa_agg_replacement_value *aggvals;
5448 int freq_sum, caller_count;
5449 profile_count count_sum;
5450 vec<cgraph_edge *> callers;
5451
5452 if (val->spec_node)
5453 {
5454 perhaps_add_new_callers (node, val);
5455 return false;
5456 }
5457 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5458 {
5459 if (dump_file && (dump_flags & TDF_DETAILS))
5460 fprintf (dump_file, " Ignoring candidate value because "
5461 "maximum unit size would be reached with %li.\n",
5462 val->local_size_cost + overall_size);
5463 return false;
5464 }
5465 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &count_sum,
5466 &caller_count))
5467 return false;
5468
5469 if (dump_file && (dump_flags & TDF_DETAILS))
5470 {
5471 fprintf (dump_file, " - considering value ");
5472 print_ipcp_constant_value (dump_file, val->value);
5473 fprintf (dump_file, " for ");
5474 ipa_dump_param (dump_file, IPA_NODE_REF (node), index);
5475 if (offset != -1)
5476 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5477 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
5478 }
5479
5480 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5481 freq_sum, count_sum,
5482 val->local_size_cost)
5483 && !good_cloning_opportunity_p (node,
5484 val->local_time_benefit
5485 + val->prop_time_benefit,
5486 freq_sum, count_sum,
5487 val->local_size_cost
5488 + val->prop_size_cost))
5489 return false;
5490
5491 if (dump_file)
5492 fprintf (dump_file, " Creating a specialized node of %s.\n",
5493 node->dump_name ());
5494
5495 callers = gather_edges_for_value (val, node, caller_count);
5496 if (offset == -1)
5497 modify_known_vectors_with_val (&known_csts, &known_contexts, val, index);
5498 else
5499 {
5500 known_csts = known_csts.copy ();
5501 known_contexts = copy_useful_known_contexts (known_contexts);
5502 }
5503 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5504 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5505 aggvals = find_aggregate_values_for_callers_subset (node, callers);
5506 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5507 offset, val->value));
5508 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5509 aggvals, callers);
5510 overall_size += val->local_size_cost;
5511
5512 /* TODO: If for some lattice there is only one other known value
5513 left, make a special node for it too. */
5514
5515 return true;
5516 }
5517
5518 /* Decide whether and what specialized clones of NODE should be created. */
5519
5520 static bool
5521 decide_whether_version_node (struct cgraph_node *node)
5522 {
5523 class ipa_node_params *info = IPA_NODE_REF (node);
5524 int i, count = ipa_get_param_count (info);
5525 vec<tree> known_csts;
5526 vec<ipa_polymorphic_call_context> known_contexts;
5527 bool ret = false;
5528
5529 if (count == 0)
5530 return false;
5531
5532 if (dump_file && (dump_flags & TDF_DETAILS))
5533 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
5534 node->dump_name ());
5535
5536 gather_context_independent_values (info, &known_csts, &known_contexts,
5537 NULL, NULL);
5538
5539 for (i = 0; i < count;i++)
5540 {
5541 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5542 ipcp_lattice<tree> *lat = &plats->itself;
5543 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
5544
5545 if (!lat->bottom
5546 && !known_csts[i])
5547 {
5548 ipcp_value<tree> *val;
5549 for (val = lat->values; val; val = val->next)
5550 ret |= decide_about_value (node, i, -1, val, known_csts,
5551 known_contexts);
5552 }
5553
5554 if (!plats->aggs_bottom)
5555 {
5556 struct ipcp_agg_lattice *aglat;
5557 ipcp_value<tree> *val;
5558 for (aglat = plats->aggs; aglat; aglat = aglat->next)
5559 if (!aglat->bottom && aglat->values
5560 /* If the following is false, the one value is in
5561 known_aggs. */
5562 && (plats->aggs_contain_variable
5563 || !aglat->is_single_const ()))
5564 for (val = aglat->values; val; val = val->next)
5565 ret |= decide_about_value (node, i, aglat->offset, val,
5566 known_csts, known_contexts);
5567 }
5568
5569 if (!ctxlat->bottom
5570 && known_contexts[i].useless_p ())
5571 {
5572 ipcp_value<ipa_polymorphic_call_context> *val;
5573 for (val = ctxlat->values; val; val = val->next)
5574 ret |= decide_about_value (node, i, -1, val, known_csts,
5575 known_contexts);
5576 }
5577
5578 info = IPA_NODE_REF (node);
5579 }
5580
5581 if (info->do_clone_for_all_contexts)
5582 {
5583 struct cgraph_node *clone;
5584 vec<cgraph_edge *> callers = node->collect_callers ();
5585
5586 for (int i = callers.length () - 1; i >= 0; i--)
5587 {
5588 cgraph_edge *cs = callers[i];
5589 class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
5590
5591 if (caller_info && caller_info->node_dead)
5592 callers.unordered_remove (i);
5593 }
5594
5595 if (!adjust_callers_for_value_intersection (callers, node))
5596 {
5597 /* If node is not called by anyone, or all its caller edges are
5598 self-recursive, the node is not really be in use, no need to
5599 do cloning. */
5600 callers.release ();
5601 known_csts.release ();
5602 known_contexts.release ();
5603 info->do_clone_for_all_contexts = false;
5604 return ret;
5605 }
5606
5607 if (dump_file)
5608 fprintf (dump_file, " - Creating a specialized node of %s "
5609 "for all known contexts.\n", node->dump_name ());
5610
5611 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5612 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5613 ipa_agg_replacement_value *aggvals
5614 = find_aggregate_values_for_callers_subset (node, callers);
5615
5616 if (!known_contexts_useful_p (known_contexts))
5617 {
5618 known_contexts.release ();
5619 known_contexts = vNULL;
5620 }
5621 clone = create_specialized_node (node, known_csts, known_contexts,
5622 aggvals, callers);
5623 info = IPA_NODE_REF (node);
5624 info->do_clone_for_all_contexts = false;
5625 IPA_NODE_REF (clone)->is_all_contexts_clone = true;
5626 ret = true;
5627 }
5628 else
5629 {
5630 known_csts.release ();
5631 known_contexts.release ();
5632 }
5633
5634 return ret;
5635 }
5636
5637 /* Transitively mark all callees of NODE within the same SCC as not dead. */
5638
5639 static void
5640 spread_undeadness (struct cgraph_node *node)
5641 {
5642 struct cgraph_edge *cs;
5643
5644 for (cs = node->callees; cs; cs = cs->next_callee)
5645 if (ipa_edge_within_scc (cs))
5646 {
5647 struct cgraph_node *callee;
5648 class ipa_node_params *info;
5649
5650 callee = cs->callee->function_symbol (NULL);
5651 info = IPA_NODE_REF (callee);
5652
5653 if (info && info->node_dead)
5654 {
5655 info->node_dead = 0;
5656 spread_undeadness (callee);
5657 }
5658 }
5659 }
5660
5661 /* Return true if NODE has a caller from outside of its SCC that is not
5662 dead. Worker callback for cgraph_for_node_and_aliases. */
5663
5664 static bool
5665 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
5666 void *data ATTRIBUTE_UNUSED)
5667 {
5668 struct cgraph_edge *cs;
5669
5670 for (cs = node->callers; cs; cs = cs->next_caller)
5671 if (cs->caller->thunk.thunk_p
5672 && cs->caller->call_for_symbol_thunks_and_aliases
5673 (has_undead_caller_from_outside_scc_p, NULL, true))
5674 return true;
5675 else if (!ipa_edge_within_scc (cs)
5676 && (!IPA_NODE_REF (cs->caller) /* Unoptimized caller. */
5677 || !IPA_NODE_REF (cs->caller)->node_dead))
5678 return true;
5679 return false;
5680 }
5681
5682
5683 /* Identify nodes within the same SCC as NODE which are no longer needed
5684 because of new clones and will be removed as unreachable. */
5685
5686 static void
5687 identify_dead_nodes (struct cgraph_node *node)
5688 {
5689 struct cgraph_node *v;
5690 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5691 if (v->local
5692 && IPA_NODE_REF (v)
5693 && !v->call_for_symbol_thunks_and_aliases
5694 (has_undead_caller_from_outside_scc_p, NULL, true))
5695 IPA_NODE_REF (v)->node_dead = 1;
5696
5697 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5698 if (IPA_NODE_REF (v) && !IPA_NODE_REF (v)->node_dead)
5699 spread_undeadness (v);
5700
5701 if (dump_file && (dump_flags & TDF_DETAILS))
5702 {
5703 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5704 if (IPA_NODE_REF (v) && IPA_NODE_REF (v)->node_dead)
5705 fprintf (dump_file, " Marking node as dead: %s.\n", v->dump_name ());
5706 }
5707 }
5708
5709 /* The decision stage. Iterate over the topological order of call graph nodes
5710 TOPO and make specialized clones if deemed beneficial. */
5711
5712 static void
5713 ipcp_decision_stage (class ipa_topo_info *topo)
5714 {
5715 int i;
5716
5717 if (dump_file)
5718 fprintf (dump_file, "\nIPA decision stage:\n\n");
5719
5720 for (i = topo->nnodes - 1; i >= 0; i--)
5721 {
5722 struct cgraph_node *node = topo->order[i];
5723 bool change = false, iterate = true;
5724
5725 while (iterate)
5726 {
5727 struct cgraph_node *v;
5728 iterate = false;
5729 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
5730 if (v->has_gimple_body_p ()
5731 && ipcp_versionable_function_p (v))
5732 iterate |= decide_whether_version_node (v);
5733
5734 change |= iterate;
5735 }
5736 if (change)
5737 identify_dead_nodes (node);
5738 }
5739 }
5740
5741 /* Look up all the bits information that we have discovered and copy it over
5742 to the transformation summary. */
5743
5744 static void
5745 ipcp_store_bits_results (void)
5746 {
5747 cgraph_node *node;
5748
5749 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
5750 {
5751 ipa_node_params *info = IPA_NODE_REF (node);
5752 bool dumped_sth = false;
5753 bool found_useful_result = false;
5754
5755 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
5756 {
5757 if (dump_file)
5758 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
5759 "; -fipa-bit-cp: disabled.\n",
5760 node->dump_name ());
5761 continue;
5762 }
5763
5764 if (info->ipcp_orig_node)
5765 info = IPA_NODE_REF (info->ipcp_orig_node);
5766 if (!info->lattices)
5767 /* Newly expanded artificial thunks do not have lattices. */
5768 continue;
5769
5770 unsigned count = ipa_get_param_count (info);
5771 for (unsigned i = 0; i < count; i++)
5772 {
5773 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5774 if (plats->bits_lattice.constant_p ())
5775 {
5776 found_useful_result = true;
5777 break;
5778 }
5779 }
5780
5781 if (!found_useful_result)
5782 continue;
5783
5784 ipcp_transformation_initialize ();
5785 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
5786 vec_safe_reserve_exact (ts->bits, count);
5787
5788 for (unsigned i = 0; i < count; i++)
5789 {
5790 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5791 ipa_bits *jfbits;
5792
5793 if (plats->bits_lattice.constant_p ())
5794 jfbits
5795 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
5796 plats->bits_lattice.get_mask ());
5797 else
5798 jfbits = NULL;
5799
5800 ts->bits->quick_push (jfbits);
5801 if (!dump_file || !jfbits)
5802 continue;
5803 if (!dumped_sth)
5804 {
5805 fprintf (dump_file, "Propagated bits info for function %s:\n",
5806 node->dump_name ());
5807 dumped_sth = true;
5808 }
5809 fprintf (dump_file, " param %i: value = ", i);
5810 print_hex (jfbits->value, dump_file);
5811 fprintf (dump_file, ", mask = ");
5812 print_hex (jfbits->mask, dump_file);
5813 fprintf (dump_file, "\n");
5814 }
5815 }
5816 }
5817
5818 /* Look up all VR information that we have discovered and copy it over
5819 to the transformation summary. */
5820
5821 static void
5822 ipcp_store_vr_results (void)
5823 {
5824 cgraph_node *node;
5825
5826 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
5827 {
5828 ipa_node_params *info = IPA_NODE_REF (node);
5829 bool found_useful_result = false;
5830
5831 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
5832 {
5833 if (dump_file)
5834 fprintf (dump_file, "Not considering %s for VR discovery "
5835 "and propagate; -fipa-ipa-vrp: disabled.\n",
5836 node->dump_name ());
5837 continue;
5838 }
5839
5840 if (info->ipcp_orig_node)
5841 info = IPA_NODE_REF (info->ipcp_orig_node);
5842 if (!info->lattices)
5843 /* Newly expanded artificial thunks do not have lattices. */
5844 continue;
5845
5846 unsigned count = ipa_get_param_count (info);
5847 for (unsigned i = 0; i < count; i++)
5848 {
5849 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5850 if (!plats->m_value_range.bottom_p ()
5851 && !plats->m_value_range.top_p ())
5852 {
5853 found_useful_result = true;
5854 break;
5855 }
5856 }
5857 if (!found_useful_result)
5858 continue;
5859
5860 ipcp_transformation_initialize ();
5861 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
5862 vec_safe_reserve_exact (ts->m_vr, count);
5863
5864 for (unsigned i = 0; i < count; i++)
5865 {
5866 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5867 ipa_vr vr;
5868
5869 if (!plats->m_value_range.bottom_p ()
5870 && !plats->m_value_range.top_p ())
5871 {
5872 vr.known = true;
5873 vr.type = plats->m_value_range.m_vr.kind ();
5874 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
5875 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
5876 }
5877 else
5878 {
5879 vr.known = false;
5880 vr.type = VR_VARYING;
5881 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
5882 }
5883 ts->m_vr->quick_push (vr);
5884 }
5885 }
5886 }
5887
5888 /* The IPCP driver. */
5889
5890 static unsigned int
5891 ipcp_driver (void)
5892 {
5893 class ipa_topo_info topo;
5894
5895 if (edge_clone_summaries == NULL)
5896 edge_clone_summaries = new edge_clone_summary_t (symtab);
5897
5898 ipa_check_create_node_params ();
5899 ipa_check_create_edge_args ();
5900 clone_num_suffixes = new hash_map<const char *, unsigned>;
5901
5902 if (dump_file)
5903 {
5904 fprintf (dump_file, "\nIPA structures before propagation:\n");
5905 if (dump_flags & TDF_DETAILS)
5906 ipa_print_all_params (dump_file);
5907 ipa_print_all_jump_functions (dump_file);
5908 }
5909
5910 /* Topological sort. */
5911 build_toporder_info (&topo);
5912 /* Do the interprocedural propagation. */
5913 ipcp_propagate_stage (&topo);
5914 /* Decide what constant propagation and cloning should be performed. */
5915 ipcp_decision_stage (&topo);
5916 /* Store results of bits propagation. */
5917 ipcp_store_bits_results ();
5918 /* Store results of value range propagation. */
5919 ipcp_store_vr_results ();
5920
5921 /* Free all IPCP structures. */
5922 delete clone_num_suffixes;
5923 free_toporder_info (&topo);
5924 delete edge_clone_summaries;
5925 edge_clone_summaries = NULL;
5926 ipa_free_all_structures_after_ipa_cp ();
5927 if (dump_file)
5928 fprintf (dump_file, "\nIPA constant propagation end\n");
5929 return 0;
5930 }
5931
5932 /* Initialization and computation of IPCP data structures. This is the initial
5933 intraprocedural analysis of functions, which gathers information to be
5934 propagated later on. */
5935
5936 static void
5937 ipcp_generate_summary (void)
5938 {
5939 struct cgraph_node *node;
5940
5941 if (dump_file)
5942 fprintf (dump_file, "\nIPA constant propagation start:\n");
5943 ipa_register_cgraph_hooks ();
5944
5945 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
5946 ipa_analyze_node (node);
5947 }
5948
5949 namespace {
5950
5951 const pass_data pass_data_ipa_cp =
5952 {
5953 IPA_PASS, /* type */
5954 "cp", /* name */
5955 OPTGROUP_NONE, /* optinfo_flags */
5956 TV_IPA_CONSTANT_PROP, /* tv_id */
5957 0, /* properties_required */
5958 0, /* properties_provided */
5959 0, /* properties_destroyed */
5960 0, /* todo_flags_start */
5961 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
5962 };
5963
5964 class pass_ipa_cp : public ipa_opt_pass_d
5965 {
5966 public:
5967 pass_ipa_cp (gcc::context *ctxt)
5968 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
5969 ipcp_generate_summary, /* generate_summary */
5970 NULL, /* write_summary */
5971 NULL, /* read_summary */
5972 ipcp_write_transformation_summaries, /*
5973 write_optimization_summary */
5974 ipcp_read_transformation_summaries, /*
5975 read_optimization_summary */
5976 NULL, /* stmt_fixup */
5977 0, /* function_transform_todo_flags_start */
5978 ipcp_transform_function, /* function_transform */
5979 NULL) /* variable_transform */
5980 {}
5981
5982 /* opt_pass methods: */
5983 virtual bool gate (function *)
5984 {
5985 /* FIXME: We should remove the optimize check after we ensure we never run
5986 IPA passes when not optimizing. */
5987 return (flag_ipa_cp && optimize) || in_lto_p;
5988 }
5989
5990 virtual unsigned int execute (function *) { return ipcp_driver (); }
5991
5992 }; // class pass_ipa_cp
5993
5994 } // anon namespace
5995
5996 ipa_opt_pass_d *
5997 make_pass_ipa_cp (gcc::context *ctxt)
5998 {
5999 return new pass_ipa_cp (ctxt);
6000 }
6001
6002 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6003 within the same process. For use by toplev::finalize. */
6004
6005 void
6006 ipa_cp_c_finalize (void)
6007 {
6008 max_count = profile_count::uninitialized ();
6009 overall_size = 0;
6010 orig_overall_size = 0;
6011 ipcp_free_transformation_sum ();
6012 }
6013