1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
102
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
129
130 template <typename valtype> class ipcp_value;
131
132 /* Describes a particular source for an IPA-CP value. */
133
134 template <typename valtype>
135 struct ipcp_value_source
136 {
137 public:
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset;
141 /* The incoming edge that brought the value. */
142 cgraph_edge *cs;
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value<valtype> *val;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source *next;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
152 int index;
153 };
154
155 /* Common ancestor for all ipcp_value instantiations. */
156
157 class ipcp_value_base
158 {
159 public:
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit;
163 /* Time benefit that specializing the function for this value can bring about
164 in it's callees. */
165 sreal prop_time_benefit;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
168 int local_size_cost;
169 /* Size cost that specializing the function for this value can bring about in
170 it's callees. */
171 int prop_size_cost;
172
ipcp_value_base()173 ipcp_value_base ()
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
176 };
177
178 /* Describes one particular value stored in struct ipcp_lattice. */
179
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
182 {
183 public:
184 /* The actual value for the given parameter. */
185 valtype value;
186 /* The list of sources from which this value originates. */
187 ipcp_value_source <valtype> *sources = nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value *next = nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
191 of values. */
192 ipcp_value *scc_next = nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value *topo_next = nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
197 created. */
198 cgraph_node *spec_node = nullptr;
199 /* Depth first search number and low link for topological sorting of
200 values. */
201 int dfs = 0;
202 int low_link = 0;
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
205 int scc_no = 0;
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
210 maximum. */
211 unsigned self_recursion_generated_level = 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack = false;
214
215 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216 HOST_WIDE_INT offset);
217
218 /* Return true if both THIS value and O feed into each other. */
219
same_scc(const ipcp_value<valtype> * o)220 bool same_scc (const ipcp_value<valtype> *o)
221 {
222 return o->scc_no == scc_no;
223 }
224
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
228
self_recursion_generated_p()229 bool self_recursion_generated_p ()
230 {
231 return self_recursion_generated_level > 0;
232 }
233 };
234
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
240
241 template <typename valtype>
242 struct ipcp_lattice
243 {
244 public:
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value<valtype> *values;
249 /* Number of known values and types in this lattice. */
250 int values_count;
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
254 propagation). */
255 bool bottom;
256
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval, cgraph_edge *cs,
261 ipcp_value<valtype> *src_val = NULL,
262 int src_idx = 0, HOST_WIDE_INT offset = -1,
263 ipcp_value<valtype> **val_p = NULL,
264 unsigned same_lat_gen_level = 0);
265 void print (FILE * f, bool dump_sources, bool dump_benefits);
266 };
267
268 /* Lattice of tree values with an offset to describe a part of an
269 aggregate. */
270
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
272 {
273 public:
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
278 HOST_WIDE_INT size;
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice *next;
281 };
282
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
285 value are constant.
286 For eg:
287 int f(int x)
288 {
289 return some_op (x);
290 }
291
292 int f1(int y)
293 {
294 if (cond)
295 return f (y & 0xff);
296 else
297 return f (y & 0xf);
298 }
299
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
305
306 class ipcp_bits_lattice
307 {
308 public:
bottom_p() const309 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
top_p() const310 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
constant_p() const311 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int, widest_int);
314 bool known_nonzero_p () const;
315
get_value() const316 widest_int get_value () const { return m_value; }
get_mask() const317 widest_int get_mask () const { return m_mask; }
318
319 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
320 enum tree_code, tree, bool);
321
322 bool meet_with (widest_int, widest_int, unsigned);
323
324 void print (FILE *);
325
326 private:
327 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
328
329 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
330 If a bit in mask is set to 0, then the corresponding bit in
331 value is known to be constant. */
332 widest_int m_value, m_mask;
333
334 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
335 void get_value_and_mask (tree, widest_int *, widest_int *);
336 };
337
338 /* Lattice of value ranges. */
339
340 class ipcp_vr_lattice
341 {
342 public:
343 value_range m_vr;
344
345 inline bool bottom_p () const;
346 inline bool top_p () const;
347 inline bool set_to_bottom ();
348 bool meet_with (const value_range *p_vr);
349 bool meet_with (const ipcp_vr_lattice &other);
init()350 void init () { gcc_assert (m_vr.undefined_p ()); }
351 void print (FILE * f);
352
353 private:
354 bool meet_with_1 (const value_range *other_vr);
355 };
356
357 /* Structure containing lattices for a parameter itself and for pieces of
358 aggregates that are passed in the parameter or by a reference in a parameter
359 plus some other useful flags. */
360
361 class ipcp_param_lattices
362 {
363 public:
364 /* Lattice describing the value of the parameter itself. */
365 ipcp_lattice<tree> itself;
366 /* Lattice describing the polymorphic contexts of a parameter. */
367 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
368 /* Lattices describing aggregate parts. */
369 ipcp_agg_lattice *aggs;
370 /* Lattice describing known bits. */
371 ipcp_bits_lattice bits_lattice;
372 /* Lattice describing value range. */
373 ipcp_vr_lattice m_value_range;
374 /* Number of aggregate lattices */
375 int aggs_count;
376 /* True if aggregate data were passed by reference (as opposed to by
377 value). */
378 bool aggs_by_ref;
379 /* All aggregate lattices contain a variable component (in addition to
380 values). */
381 bool aggs_contain_variable;
382 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
383 for any propagation). */
384 bool aggs_bottom;
385
386 /* There is a virtual call based on this parameter. */
387 bool virt_call;
388 };
389
390 /* Allocation pools for values and their sources in ipa-cp. */
391
392 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
393 ("IPA-CP constant values");
394
395 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
396 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397
398 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
399 ("IPA-CP value sources");
400
401 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
402 ("IPA_CP aggregate lattices");
403
404 /* Base count to use in heuristics when using profile feedback. */
405
406 static profile_count base_count;
407
408 /* Original overall size of the program. */
409
410 static long overall_size, orig_overall_size;
411
412 /* Node name to unique clone suffix number map. */
413 static hash_map<const char *, unsigned> *clone_num_suffixes;
414
415 /* Return the param lattices structure corresponding to the Ith formal
416 parameter of the function described by INFO. */
417 static inline class ipcp_param_lattices *
ipa_get_parm_lattices(class ipa_node_params * info,int i)418 ipa_get_parm_lattices (class ipa_node_params *info, int i)
419 {
420 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
421 gcc_checking_assert (!info->ipcp_orig_node);
422 gcc_checking_assert (info->lattices);
423 return &(info->lattices[i]);
424 }
425
426 /* Return the lattice corresponding to the scalar value of the Ith formal
427 parameter of the function described by INFO. */
428 static inline ipcp_lattice<tree> *
ipa_get_scalar_lat(class ipa_node_params * info,int i)429 ipa_get_scalar_lat (class ipa_node_params *info, int i)
430 {
431 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
432 return &plats->itself;
433 }
434
435 /* Return the lattice corresponding to the scalar value of the Ith formal
436 parameter of the function described by INFO. */
437 static inline ipcp_lattice<ipa_polymorphic_call_context> *
ipa_get_poly_ctx_lat(class ipa_node_params * info,int i)438 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
439 {
440 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
441 return &plats->ctxlat;
442 }
443
444 /* Return whether LAT is a lattice with a single constant and without an
445 undefined value. */
446
447 template <typename valtype>
448 inline bool
is_single_const()449 ipcp_lattice<valtype>::is_single_const ()
450 {
451 if (bottom || contains_variable || values_count != 1)
452 return false;
453 else
454 return true;
455 }
456
457 /* Print V which is extracted from a value in a lattice to F. */
458
459 static void
print_ipcp_constant_value(FILE * f,tree v)460 print_ipcp_constant_value (FILE * f, tree v)
461 {
462 if (TREE_CODE (v) == ADDR_EXPR
463 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
464 {
465 fprintf (f, "& ");
466 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
467 }
468 else
469 print_generic_expr (f, v);
470 }
471
472 /* Print V which is extracted from a value in a lattice to F. */
473
474 static void
print_ipcp_constant_value(FILE * f,ipa_polymorphic_call_context v)475 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
476 {
477 v.dump(f, false);
478 }
479
480 /* Print a lattice LAT to F. */
481
482 template <typename valtype>
483 void
print(FILE * f,bool dump_sources,bool dump_benefits)484 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
485 {
486 ipcp_value<valtype> *val;
487 bool prev = false;
488
489 if (bottom)
490 {
491 fprintf (f, "BOTTOM\n");
492 return;
493 }
494
495 if (!values_count && !contains_variable)
496 {
497 fprintf (f, "TOP\n");
498 return;
499 }
500
501 if (contains_variable)
502 {
503 fprintf (f, "VARIABLE");
504 prev = true;
505 if (dump_benefits)
506 fprintf (f, "\n");
507 }
508
509 for (val = values; val; val = val->next)
510 {
511 if (dump_benefits && prev)
512 fprintf (f, " ");
513 else if (!dump_benefits && prev)
514 fprintf (f, ", ");
515 else
516 prev = true;
517
518 print_ipcp_constant_value (f, val->value);
519
520 if (dump_sources)
521 {
522 ipcp_value_source<valtype> *s;
523
524 if (val->self_recursion_generated_p ())
525 fprintf (f, " [self_gen(%i), from:",
526 val->self_recursion_generated_level);
527 else
528 fprintf (f, " [scc: %i, from:", val->scc_no);
529 for (s = val->sources; s; s = s->next)
530 fprintf (f, " %i(%f)", s->cs->caller->order,
531 s->cs->sreal_frequency ().to_double ());
532 fprintf (f, "]");
533 }
534
535 if (dump_benefits)
536 fprintf (f, " [loc_time: %g, loc_size: %i, "
537 "prop_time: %g, prop_size: %i]\n",
538 val->local_time_benefit.to_double (), val->local_size_cost,
539 val->prop_time_benefit.to_double (), val->prop_size_cost);
540 }
541 if (!dump_benefits)
542 fprintf (f, "\n");
543 }
544
545 void
print(FILE * f)546 ipcp_bits_lattice::print (FILE *f)
547 {
548 if (top_p ())
549 fprintf (f, " Bits unknown (TOP)\n");
550 else if (bottom_p ())
551 fprintf (f, " Bits unusable (BOTTOM)\n");
552 else
553 {
554 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
555 fprintf (f, ", mask = "); print_hex (get_mask (), f);
556 fprintf (f, "\n");
557 }
558 }
559
560 /* Print value range lattice to F. */
561
562 void
print(FILE * f)563 ipcp_vr_lattice::print (FILE * f)
564 {
565 dump_value_range (f, &m_vr);
566 }
567
568 /* Print all ipcp_lattices of all functions to F. */
569
570 static void
print_all_lattices(FILE * f,bool dump_sources,bool dump_benefits)571 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
572 {
573 struct cgraph_node *node;
574 int i, count;
575
576 fprintf (f, "\nLattices:\n");
577 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
578 {
579 class ipa_node_params *info;
580
581 info = ipa_node_params_sum->get (node);
582 /* Skip unoptimized functions and constprop clones since we don't make
583 lattices for them. */
584 if (!info || info->ipcp_orig_node)
585 continue;
586 fprintf (f, " Node: %s:\n", node->dump_name ());
587 count = ipa_get_param_count (info);
588 for (i = 0; i < count; i++)
589 {
590 struct ipcp_agg_lattice *aglat;
591 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
592 fprintf (f, " param [%d]: ", i);
593 plats->itself.print (f, dump_sources, dump_benefits);
594 fprintf (f, " ctxs: ");
595 plats->ctxlat.print (f, dump_sources, dump_benefits);
596 plats->bits_lattice.print (f);
597 fprintf (f, " ");
598 plats->m_value_range.print (f);
599 fprintf (f, "\n");
600 if (plats->virt_call)
601 fprintf (f, " virt_call flag set\n");
602
603 if (plats->aggs_bottom)
604 {
605 fprintf (f, " AGGS BOTTOM\n");
606 continue;
607 }
608 if (plats->aggs_contain_variable)
609 fprintf (f, " AGGS VARIABLE\n");
610 for (aglat = plats->aggs; aglat; aglat = aglat->next)
611 {
612 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
613 plats->aggs_by_ref ? "ref " : "", aglat->offset);
614 aglat->print (f, dump_sources, dump_benefits);
615 }
616 }
617 }
618 }
619
620 /* Determine whether it is at all technically possible to create clones of NODE
621 and store this information in the ipa_node_params structure associated
622 with NODE. */
623
624 static void
determine_versionability(struct cgraph_node * node,class ipa_node_params * info)625 determine_versionability (struct cgraph_node *node,
626 class ipa_node_params *info)
627 {
628 const char *reason = NULL;
629
630 /* There are a number of generic reasons functions cannot be versioned. We
631 also cannot remove parameters if there are type attributes such as fnspec
632 present. */
633 if (node->alias || node->thunk)
634 reason = "alias or thunk";
635 else if (!node->versionable)
636 reason = "not a tree_versionable_function";
637 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
638 reason = "insufficient body availability";
639 else if (!opt_for_fn (node->decl, optimize)
640 || !opt_for_fn (node->decl, flag_ipa_cp))
641 reason = "non-optimized function";
642 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
643 {
644 /* Ideally we should clone the SIMD clones themselves and create
645 vector copies of them, so IPA-cp and SIMD clones can happily
646 coexist, but that may not be worth the effort. */
647 reason = "function has SIMD clones";
648 }
649 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
650 {
651 /* Ideally we should clone the target clones themselves and create
652 copies of them, so IPA-cp and target clones can happily
653 coexist, but that may not be worth the effort. */
654 reason = "function target_clones attribute";
655 }
656 /* Don't clone decls local to a comdat group; it breaks and for C++
657 decloned constructors, inlining is always better anyway. */
658 else if (node->comdat_local_p ())
659 reason = "comdat-local function";
660 else if (node->calls_comdat_local)
661 {
662 /* TODO: call is versionable if we make sure that all
663 callers are inside of a comdat group. */
664 reason = "calls comdat-local function";
665 }
666
667 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
668 work only when inlined. Cloning them may still lead to better code
669 because ipa-cp will not give up on cloning further. If the function is
670 external this however leads to wrong code because we may end up producing
671 offline copy of the function. */
672 if (DECL_EXTERNAL (node->decl))
673 for (cgraph_edge *edge = node->callees; !reason && edge;
674 edge = edge->next_callee)
675 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
676 {
677 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
678 reason = "external function which calls va_arg_pack";
679 if (DECL_FUNCTION_CODE (edge->callee->decl)
680 == BUILT_IN_VA_ARG_PACK_LEN)
681 reason = "external function which calls va_arg_pack_len";
682 }
683
684 if (reason && dump_file && !node->alias && !node->thunk)
685 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
686 node->dump_name (), reason);
687
688 info->versionable = (reason == NULL);
689 }
690
691 /* Return true if it is at all technically possible to create clones of a
692 NODE. */
693
694 static bool
ipcp_versionable_function_p(struct cgraph_node * node)695 ipcp_versionable_function_p (struct cgraph_node *node)
696 {
697 ipa_node_params *info = ipa_node_params_sum->get (node);
698 return info && info->versionable;
699 }
700
701 /* Structure holding accumulated information about callers of a node. */
702
703 struct caller_statistics
704 {
705 /* If requested (see below), self-recursive call counts are summed into this
706 field. */
707 profile_count rec_count_sum;
708 /* The sum of all ipa counts of all the other (non-recursive) calls. */
709 profile_count count_sum;
710 /* Sum of all frequencies for all calls. */
711 sreal freq_sum;
712 /* Number of calls and hot calls respectively. */
713 int n_calls, n_hot_calls;
714 /* If itself is set up, also count the number of non-self-recursive
715 calls. */
716 int n_nonrec_calls;
717 /* If non-NULL, this is the node itself and calls from it should have their
718 counts included in rec_count_sum and not count_sum. */
719 cgraph_node *itself;
720 };
721
722 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
723 from IGNORED_CALLER are not counted. */
724
725 static inline void
init_caller_stats(caller_statistics * stats,cgraph_node * itself=NULL)726 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
727 {
728 stats->rec_count_sum = profile_count::zero ();
729 stats->count_sum = profile_count::zero ();
730 stats->n_calls = 0;
731 stats->n_hot_calls = 0;
732 stats->n_nonrec_calls = 0;
733 stats->freq_sum = 0;
734 stats->itself = itself;
735 }
736
737 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
738 non-thunk incoming edges to NODE. */
739
740 static bool
gather_caller_stats(struct cgraph_node * node,void * data)741 gather_caller_stats (struct cgraph_node *node, void *data)
742 {
743 struct caller_statistics *stats = (struct caller_statistics *) data;
744 struct cgraph_edge *cs;
745
746 for (cs = node->callers; cs; cs = cs->next_caller)
747 if (!cs->caller->thunk)
748 {
749 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
750 if (info && info->node_dead)
751 continue;
752
753 if (cs->count.ipa ().initialized_p ())
754 {
755 if (stats->itself && stats->itself == cs->caller)
756 stats->rec_count_sum += cs->count.ipa ();
757 else
758 stats->count_sum += cs->count.ipa ();
759 }
760 stats->freq_sum += cs->sreal_frequency ();
761 stats->n_calls++;
762 if (stats->itself && stats->itself != cs->caller)
763 stats->n_nonrec_calls++;
764
765 if (cs->maybe_hot_p ())
766 stats->n_hot_calls ++;
767 }
768 return false;
769
770 }
771
772 /* Return true if this NODE is viable candidate for cloning. */
773
774 static bool
ipcp_cloning_candidate_p(struct cgraph_node * node)775 ipcp_cloning_candidate_p (struct cgraph_node *node)
776 {
777 struct caller_statistics stats;
778
779 gcc_checking_assert (node->has_gimple_body_p ());
780
781 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
782 {
783 if (dump_file)
784 fprintf (dump_file, "Not considering %s for cloning; "
785 "-fipa-cp-clone disabled.\n",
786 node->dump_name ());
787 return false;
788 }
789
790 if (node->optimize_for_size_p ())
791 {
792 if (dump_file)
793 fprintf (dump_file, "Not considering %s for cloning; "
794 "optimizing it for size.\n",
795 node->dump_name ());
796 return false;
797 }
798
799 init_caller_stats (&stats);
800 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
801
802 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
803 {
804 if (dump_file)
805 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
806 node->dump_name ());
807 return true;
808 }
809
810 /* When profile is available and function is hot, propagate into it even if
811 calls seems cold; constant propagation can improve function's speed
812 significantly. */
813 if (stats.count_sum > profile_count::zero ()
814 && node->count.ipa ().initialized_p ())
815 {
816 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
817 {
818 if (dump_file)
819 fprintf (dump_file, "Considering %s for cloning; "
820 "usually called directly.\n",
821 node->dump_name ());
822 return true;
823 }
824 }
825 if (!stats.n_hot_calls)
826 {
827 if (dump_file)
828 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
829 node->dump_name ());
830 return false;
831 }
832 if (dump_file)
833 fprintf (dump_file, "Considering %s for cloning.\n",
834 node->dump_name ());
835 return true;
836 }
837
838 template <typename valtype>
839 class value_topo_info
840 {
841 public:
842 /* Head of the linked list of topologically sorted values. */
843 ipcp_value<valtype> *values_topo;
844 /* Stack for creating SCCs, represented by a linked list too. */
845 ipcp_value<valtype> *stack;
846 /* Counter driving the algorithm in add_val_to_toposort. */
847 int dfs_counter;
848
value_topo_info()849 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
850 {}
851 void add_val (ipcp_value<valtype> *cur_val);
852 void propagate_effects ();
853 };
854
855 /* Arrays representing a topological ordering of call graph nodes and a stack
856 of nodes used during constant propagation and also data required to perform
857 topological sort of values and propagation of benefits in the determined
858 order. */
859
860 class ipa_topo_info
861 {
862 public:
863 /* Array with obtained topological order of cgraph nodes. */
864 struct cgraph_node **order;
865 /* Stack of cgraph nodes used during propagation within SCC until all values
866 in the SCC stabilize. */
867 struct cgraph_node **stack;
868 int nnodes, stack_top;
869
870 value_topo_info<tree> constants;
871 value_topo_info<ipa_polymorphic_call_context> contexts;
872
ipa_topo_info()873 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
874 constants ()
875 {}
876 };
877
878 /* Skip edges from and to nodes without ipa_cp enabled.
879 Ignore not available symbols. */
880
881 static bool
ignore_edge_p(cgraph_edge * e)882 ignore_edge_p (cgraph_edge *e)
883 {
884 enum availability avail;
885 cgraph_node *ultimate_target
886 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
887
888 return (avail <= AVAIL_INTERPOSABLE
889 || !opt_for_fn (ultimate_target->decl, optimize)
890 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
891 }
892
893 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
894
895 static void
build_toporder_info(class ipa_topo_info * topo)896 build_toporder_info (class ipa_topo_info *topo)
897 {
898 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
899 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
900
901 gcc_checking_assert (topo->stack_top == 0);
902 topo->nnodes = ipa_reduced_postorder (topo->order, true,
903 ignore_edge_p);
904 }
905
906 /* Free information about strongly connected components and the arrays in
907 TOPO. */
908
909 static void
free_toporder_info(class ipa_topo_info * topo)910 free_toporder_info (class ipa_topo_info *topo)
911 {
912 ipa_free_postorder_info ();
913 free (topo->order);
914 free (topo->stack);
915 }
916
917 /* Add NODE to the stack in TOPO, unless it is already there. */
918
919 static inline void
push_node_to_stack(class ipa_topo_info * topo,struct cgraph_node * node)920 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
921 {
922 ipa_node_params *info = ipa_node_params_sum->get (node);
923 if (info->node_enqueued)
924 return;
925 info->node_enqueued = 1;
926 topo->stack[topo->stack_top++] = node;
927 }
928
929 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
930 is empty. */
931
932 static struct cgraph_node *
pop_node_from_stack(class ipa_topo_info * topo)933 pop_node_from_stack (class ipa_topo_info *topo)
934 {
935 if (topo->stack_top)
936 {
937 struct cgraph_node *node;
938 topo->stack_top--;
939 node = topo->stack[topo->stack_top];
940 ipa_node_params_sum->get (node)->node_enqueued = 0;
941 return node;
942 }
943 else
944 return NULL;
945 }
946
947 /* Set lattice LAT to bottom and return true if it previously was not set as
948 such. */
949
950 template <typename valtype>
951 inline bool
set_to_bottom()952 ipcp_lattice<valtype>::set_to_bottom ()
953 {
954 bool ret = !bottom;
955 bottom = true;
956 return ret;
957 }
958
959 /* Mark lattice as containing an unknown value and return true if it previously
960 was not marked as such. */
961
962 template <typename valtype>
963 inline bool
set_contains_variable()964 ipcp_lattice<valtype>::set_contains_variable ()
965 {
966 bool ret = !contains_variable;
967 contains_variable = true;
968 return ret;
969 }
970
971 /* Set all aggregate lattices in PLATS to bottom and return true if they were
972 not previously set as such. */
973
974 static inline bool
set_agg_lats_to_bottom(class ipcp_param_lattices * plats)975 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
976 {
977 bool ret = !plats->aggs_bottom;
978 plats->aggs_bottom = true;
979 return ret;
980 }
981
982 /* Mark all aggregate lattices in PLATS as containing an unknown value and
983 return true if they were not previously marked as such. */
984
985 static inline bool
set_agg_lats_contain_variable(class ipcp_param_lattices * plats)986 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
987 {
988 bool ret = !plats->aggs_contain_variable;
989 plats->aggs_contain_variable = true;
990 return ret;
991 }
992
993 bool
meet_with(const ipcp_vr_lattice & other)994 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
995 {
996 return meet_with_1 (&other.m_vr);
997 }
998
999 /* Meet the current value of the lattice with value range described by VR
1000 lattice. */
1001
1002 bool
meet_with(const value_range * p_vr)1003 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1004 {
1005 return meet_with_1 (p_vr);
1006 }
1007
1008 /* Meet the current value of the lattice with value range described by
1009 OTHER_VR lattice. Return TRUE if anything changed. */
1010
1011 bool
meet_with_1(const value_range * other_vr)1012 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1013 {
1014 if (bottom_p ())
1015 return false;
1016
1017 if (other_vr->varying_p ())
1018 return set_to_bottom ();
1019
1020 value_range save (m_vr);
1021 m_vr.union_ (other_vr);
1022 return !m_vr.equal_p (save);
1023 }
1024
1025 /* Return true if value range information in the lattice is yet unknown. */
1026
1027 bool
top_p() const1028 ipcp_vr_lattice::top_p () const
1029 {
1030 return m_vr.undefined_p ();
1031 }
1032
1033 /* Return true if value range information in the lattice is known to be
1034 unusable. */
1035
1036 bool
bottom_p() const1037 ipcp_vr_lattice::bottom_p () const
1038 {
1039 return m_vr.varying_p ();
1040 }
1041
1042 /* Set value range information in the lattice to bottom. Return true if it
1043 previously was in a different state. */
1044
1045 bool
set_to_bottom()1046 ipcp_vr_lattice::set_to_bottom ()
1047 {
1048 if (m_vr.varying_p ())
1049 return false;
1050 /* ?? We create all sorts of VARYING ranges for floats, structures,
1051 and other types which we cannot handle as ranges. We should
1052 probably avoid handling them throughout the pass, but it's easier
1053 to create a sensible VARYING here and let the lattice
1054 propagate. */
1055 m_vr.set_varying (integer_type_node);
1056 return true;
1057 }
1058
1059 /* Set lattice value to bottom, if it already isn't the case. */
1060
1061 bool
set_to_bottom()1062 ipcp_bits_lattice::set_to_bottom ()
1063 {
1064 if (bottom_p ())
1065 return false;
1066 m_lattice_val = IPA_BITS_VARYING;
1067 m_value = 0;
1068 m_mask = -1;
1069 return true;
1070 }
1071
1072 /* Set to constant if it isn't already. Only meant to be called
1073 when switching state from TOP. */
1074
1075 bool
set_to_constant(widest_int value,widest_int mask)1076 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1077 {
1078 gcc_assert (top_p ());
1079 m_lattice_val = IPA_BITS_CONSTANT;
1080 m_value = wi::bit_and (wi::bit_not (mask), value);
1081 m_mask = mask;
1082 return true;
1083 }
1084
1085 /* Return true if any of the known bits are non-zero. */
1086
1087 bool
known_nonzero_p() const1088 ipcp_bits_lattice::known_nonzero_p () const
1089 {
1090 if (!constant_p ())
1091 return false;
1092 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1093 }
1094
1095 /* Convert operand to value, mask form. */
1096
1097 void
get_value_and_mask(tree operand,widest_int * valuep,widest_int * maskp)1098 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1099 {
1100 wide_int get_nonzero_bits (const_tree);
1101
1102 if (TREE_CODE (operand) == INTEGER_CST)
1103 {
1104 *valuep = wi::to_widest (operand);
1105 *maskp = 0;
1106 }
1107 else
1108 {
1109 *valuep = 0;
1110 *maskp = -1;
1111 }
1112 }
1113
1114 /* Meet operation, similar to ccp_lattice_meet, we xor values
1115 if this->value, value have different values at same bit positions, we want
1116 to drop that bit to varying. Return true if mask is changed.
1117 This function assumes that the lattice value is in CONSTANT state. If
1118 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1119
1120 bool
meet_with_1(widest_int value,widest_int mask,unsigned precision,bool drop_all_ones)1121 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1122 unsigned precision, bool drop_all_ones)
1123 {
1124 gcc_assert (constant_p ());
1125
1126 widest_int old_mask = m_mask;
1127 m_mask = (m_mask | mask) | (m_value ^ value);
1128 if (drop_all_ones)
1129 m_mask |= m_value;
1130 m_value &= ~m_mask;
1131
1132 if (wi::sext (m_mask, precision) == -1)
1133 return set_to_bottom ();
1134
1135 return m_mask != old_mask;
1136 }
1137
1138 /* Meet the bits lattice with operand
1139 described by <value, mask, sgn, precision. */
1140
1141 bool
meet_with(widest_int value,widest_int mask,unsigned precision)1142 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1143 unsigned precision)
1144 {
1145 if (bottom_p ())
1146 return false;
1147
1148 if (top_p ())
1149 {
1150 if (wi::sext (mask, precision) == -1)
1151 return set_to_bottom ();
1152 return set_to_constant (value, mask);
1153 }
1154
1155 return meet_with_1 (value, mask, precision, false);
1156 }
1157
1158 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1159 if code is binary operation or bit_value_unop (other) if code is unary op.
1160 In the case when code is nop_expr, no adjustment is required. If
1161 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1162
1163 bool
meet_with(ipcp_bits_lattice & other,unsigned precision,signop sgn,enum tree_code code,tree operand,bool drop_all_ones)1164 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1165 signop sgn, enum tree_code code, tree operand,
1166 bool drop_all_ones)
1167 {
1168 if (other.bottom_p ())
1169 return set_to_bottom ();
1170
1171 if (bottom_p () || other.top_p ())
1172 return false;
1173
1174 widest_int adjusted_value, adjusted_mask;
1175
1176 if (TREE_CODE_CLASS (code) == tcc_binary)
1177 {
1178 tree type = TREE_TYPE (operand);
1179 widest_int o_value, o_mask;
1180 get_value_and_mask (operand, &o_value, &o_mask);
1181
1182 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1183 sgn, precision, other.get_value (), other.get_mask (),
1184 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1185
1186 if (wi::sext (adjusted_mask, precision) == -1)
1187 return set_to_bottom ();
1188 }
1189
1190 else if (TREE_CODE_CLASS (code) == tcc_unary)
1191 {
1192 bit_value_unop (code, sgn, precision, &adjusted_value,
1193 &adjusted_mask, sgn, precision, other.get_value (),
1194 other.get_mask ());
1195
1196 if (wi::sext (adjusted_mask, precision) == -1)
1197 return set_to_bottom ();
1198 }
1199
1200 else
1201 return set_to_bottom ();
1202
1203 if (top_p ())
1204 {
1205 if (drop_all_ones)
1206 {
1207 adjusted_mask |= adjusted_value;
1208 adjusted_value &= ~adjusted_mask;
1209 }
1210 if (wi::sext (adjusted_mask, precision) == -1)
1211 return set_to_bottom ();
1212 return set_to_constant (adjusted_value, adjusted_mask);
1213 }
1214 else
1215 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1216 drop_all_ones);
1217 }
1218
1219 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1220 return true is any of them has not been marked as such so far. */
1221
1222 static inline bool
set_all_contains_variable(class ipcp_param_lattices * plats)1223 set_all_contains_variable (class ipcp_param_lattices *plats)
1224 {
1225 bool ret;
1226 ret = plats->itself.set_contains_variable ();
1227 ret |= plats->ctxlat.set_contains_variable ();
1228 ret |= set_agg_lats_contain_variable (plats);
1229 ret |= plats->bits_lattice.set_to_bottom ();
1230 ret |= plats->m_value_range.set_to_bottom ();
1231 return ret;
1232 }
1233
1234 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1235 points to by the number of callers to NODE. */
1236
1237 static bool
count_callers(cgraph_node * node,void * data)1238 count_callers (cgraph_node *node, void *data)
1239 {
1240 int *caller_count = (int *) data;
1241
1242 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1243 /* Local thunks can be handled transparently, but if the thunk cannot
1244 be optimized out, count it as a real use. */
1245 if (!cs->caller->thunk || !cs->caller->local)
1246 ++*caller_count;
1247 return false;
1248 }
1249
1250 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1251 the one caller of some other node. Set the caller's corresponding flag. */
1252
1253 static bool
set_single_call_flag(cgraph_node * node,void *)1254 set_single_call_flag (cgraph_node *node, void *)
1255 {
1256 cgraph_edge *cs = node->callers;
1257 /* Local thunks can be handled transparently, skip them. */
1258 while (cs && cs->caller->thunk && cs->caller->local)
1259 cs = cs->next_caller;
1260 if (cs)
1261 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1262 {
1263 info->node_calling_single_call = true;
1264 return true;
1265 }
1266 return false;
1267 }
1268
1269 /* Initialize ipcp_lattices. */
1270
1271 static void
initialize_node_lattices(struct cgraph_node * node)1272 initialize_node_lattices (struct cgraph_node *node)
1273 {
1274 ipa_node_params *info = ipa_node_params_sum->get (node);
1275 struct cgraph_edge *ie;
1276 bool disable = false, variable = false;
1277 int i;
1278
1279 gcc_checking_assert (node->has_gimple_body_p ());
1280
1281 if (!ipa_get_param_count (info))
1282 disable = true;
1283 else if (node->local)
1284 {
1285 int caller_count = 0;
1286 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1287 true);
1288 gcc_checking_assert (caller_count > 0);
1289 if (caller_count == 1)
1290 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1291 NULL, true);
1292 }
1293 else
1294 {
1295 /* When cloning is allowed, we can assume that externally visible
1296 functions are not called. We will compensate this by cloning
1297 later. */
1298 if (ipcp_versionable_function_p (node)
1299 && ipcp_cloning_candidate_p (node))
1300 variable = true;
1301 else
1302 disable = true;
1303 }
1304
1305 if (dump_file && (dump_flags & TDF_DETAILS)
1306 && !node->alias && !node->thunk)
1307 {
1308 fprintf (dump_file, "Initializing lattices of %s\n",
1309 node->dump_name ());
1310 if (disable || variable)
1311 fprintf (dump_file, " Marking all lattices as %s\n",
1312 disable ? "BOTTOM" : "VARIABLE");
1313 }
1314
1315 auto_vec<bool, 16> surviving_params;
1316 bool pre_modified = false;
1317
1318 clone_info *cinfo = clone_info::get (node);
1319
1320 if (!disable && cinfo && cinfo->param_adjustments)
1321 {
1322 /* At the moment all IPA optimizations should use the number of
1323 parameters of the prevailing decl as the m_always_copy_start.
1324 Handling any other value would complicate the code below, so for the
1325 time bing let's only assert it is so. */
1326 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1327 == ipa_get_param_count (info))
1328 || cinfo->param_adjustments->m_always_copy_start < 0);
1329
1330 pre_modified = true;
1331 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1332
1333 if (dump_file && (dump_flags & TDF_DETAILS)
1334 && !node->alias && !node->thunk)
1335 {
1336 bool first = true;
1337 for (int j = 0; j < ipa_get_param_count (info); j++)
1338 {
1339 if (j < (int) surviving_params.length ()
1340 && surviving_params[j])
1341 continue;
1342 if (first)
1343 {
1344 fprintf (dump_file,
1345 " The following parameters are dead on arrival:");
1346 first = false;
1347 }
1348 fprintf (dump_file, " %u", j);
1349 }
1350 if (!first)
1351 fprintf (dump_file, "\n");
1352 }
1353 }
1354
1355 for (i = 0; i < ipa_get_param_count (info); i++)
1356 {
1357 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1358 if (disable
1359 || !ipa_get_type (info, i)
1360 || (pre_modified && (surviving_params.length () <= (unsigned) i
1361 || !surviving_params[i])))
1362 {
1363 plats->itself.set_to_bottom ();
1364 plats->ctxlat.set_to_bottom ();
1365 set_agg_lats_to_bottom (plats);
1366 plats->bits_lattice.set_to_bottom ();
1367 plats->m_value_range.m_vr = value_range ();
1368 plats->m_value_range.set_to_bottom ();
1369 }
1370 else
1371 {
1372 plats->m_value_range.init ();
1373 if (variable)
1374 set_all_contains_variable (plats);
1375 }
1376 }
1377
1378 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1379 if (ie->indirect_info->polymorphic
1380 && ie->indirect_info->param_index >= 0)
1381 {
1382 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1383 ipa_get_parm_lattices (info,
1384 ie->indirect_info->param_index)->virt_call = 1;
1385 }
1386 }
1387
1388 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1389 PARAM_TYPE. */
1390
1391 static bool
ipacp_value_safe_for_type(tree param_type,tree value)1392 ipacp_value_safe_for_type (tree param_type, tree value)
1393 {
1394 tree val_type = TREE_TYPE (value);
1395 if (param_type == val_type
1396 || useless_type_conversion_p (param_type, val_type)
1397 || fold_convertible_p (param_type, value))
1398 return true;
1399 else
1400 return false;
1401 }
1402
1403 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1404
1405 static bool
values_equal_for_ipcp_p(tree x,tree y)1406 values_equal_for_ipcp_p (tree x, tree y)
1407 {
1408 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1409
1410 if (x == y)
1411 return true;
1412
1413 if (TREE_CODE (x) == ADDR_EXPR
1414 && TREE_CODE (y) == ADDR_EXPR
1415 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1416 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1417 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1418 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1419 else
1420 return operand_equal_p (x, y, 0);
1421 }
1422
1423 /* Return the result of a (possibly arithmetic) operation on the constant
1424 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1425 the type of the parameter to which the result is passed. Return
1426 NULL_TREE if that cannot be determined or be considered an
1427 interprocedural invariant. */
1428
1429 static tree
ipa_get_jf_arith_result(enum tree_code opcode,tree input,tree operand,tree res_type)1430 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1431 tree res_type)
1432 {
1433 tree res;
1434
1435 if (opcode == NOP_EXPR)
1436 return input;
1437 if (!is_gimple_ip_invariant (input))
1438 return NULL_TREE;
1439
1440 if (opcode == ASSERT_EXPR)
1441 {
1442 if (values_equal_for_ipcp_p (input, operand))
1443 return input;
1444 else
1445 return NULL_TREE;
1446 }
1447
1448 if (!res_type)
1449 {
1450 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1451 res_type = boolean_type_node;
1452 else if (expr_type_first_operand_type_p (opcode))
1453 res_type = TREE_TYPE (input);
1454 else
1455 return NULL_TREE;
1456 }
1457
1458 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1459 res = fold_unary (opcode, res_type, input);
1460 else
1461 res = fold_binary (opcode, res_type, input, operand);
1462
1463 if (res && !is_gimple_ip_invariant (res))
1464 return NULL_TREE;
1465
1466 return res;
1467 }
1468
1469 /* Return the result of a (possibly arithmetic) pass through jump function
1470 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1471 to which the result is passed. Return NULL_TREE if that cannot be
1472 determined or be considered an interprocedural invariant. */
1473
1474 static tree
ipa_get_jf_pass_through_result(struct ipa_jump_func * jfunc,tree input,tree res_type)1475 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1476 tree res_type)
1477 {
1478 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1479 input,
1480 ipa_get_jf_pass_through_operand (jfunc),
1481 res_type);
1482 }
1483
1484 /* Return the result of an ancestor jump function JFUNC on the constant value
1485 INPUT. Return NULL_TREE if that cannot be determined. */
1486
1487 static tree
ipa_get_jf_ancestor_result(struct ipa_jump_func * jfunc,tree input)1488 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1489 {
1490 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1491 if (TREE_CODE (input) == ADDR_EXPR)
1492 {
1493 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1494 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1495 if (known_eq (off, 0))
1496 return input;
1497 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1498 return build1 (ADDR_EXPR, TREE_TYPE (input),
1499 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1500 build_int_cst (ptr_type_node, byte_offset)));
1501 }
1502 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1503 && zerop (input))
1504 return input;
1505 else
1506 return NULL_TREE;
1507 }
1508
1509 /* Determine whether JFUNC evaluates to a single known constant value and if
1510 so, return it. Otherwise return NULL. INFO describes the caller node or
1511 the one it is inlined to, so that pass-through jump functions can be
1512 evaluated. PARM_TYPE is the type of the parameter to which the result is
1513 passed. */
1514
1515 tree
ipa_value_from_jfunc(class ipa_node_params * info,struct ipa_jump_func * jfunc,tree parm_type)1516 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1517 tree parm_type)
1518 {
1519 if (jfunc->type == IPA_JF_CONST)
1520 return ipa_get_jf_constant (jfunc);
1521 else if (jfunc->type == IPA_JF_PASS_THROUGH
1522 || jfunc->type == IPA_JF_ANCESTOR)
1523 {
1524 tree input;
1525 int idx;
1526
1527 if (jfunc->type == IPA_JF_PASS_THROUGH)
1528 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1529 else
1530 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1531
1532 if (info->ipcp_orig_node)
1533 input = info->known_csts[idx];
1534 else
1535 {
1536 ipcp_lattice<tree> *lat;
1537
1538 if (!info->lattices
1539 || idx >= ipa_get_param_count (info))
1540 return NULL_TREE;
1541 lat = ipa_get_scalar_lat (info, idx);
1542 if (!lat->is_single_const ())
1543 return NULL_TREE;
1544 input = lat->values->value;
1545 }
1546
1547 if (!input)
1548 return NULL_TREE;
1549
1550 if (jfunc->type == IPA_JF_PASS_THROUGH)
1551 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1552 else
1553 return ipa_get_jf_ancestor_result (jfunc, input);
1554 }
1555 else
1556 return NULL_TREE;
1557 }
1558
1559 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1560 that INFO describes the caller node or the one it is inlined to, CS is the
1561 call graph edge corresponding to JFUNC and CSIDX index of the described
1562 parameter. */
1563
1564 ipa_polymorphic_call_context
ipa_context_from_jfunc(ipa_node_params * info,cgraph_edge * cs,int csidx,ipa_jump_func * jfunc)1565 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1566 ipa_jump_func *jfunc)
1567 {
1568 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1569 ipa_polymorphic_call_context ctx;
1570 ipa_polymorphic_call_context *edge_ctx
1571 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1572
1573 if (edge_ctx && !edge_ctx->useless_p ())
1574 ctx = *edge_ctx;
1575
1576 if (jfunc->type == IPA_JF_PASS_THROUGH
1577 || jfunc->type == IPA_JF_ANCESTOR)
1578 {
1579 ipa_polymorphic_call_context srcctx;
1580 int srcidx;
1581 bool type_preserved = true;
1582 if (jfunc->type == IPA_JF_PASS_THROUGH)
1583 {
1584 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1585 return ctx;
1586 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1587 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1588 }
1589 else
1590 {
1591 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1592 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1593 }
1594 if (info->ipcp_orig_node)
1595 {
1596 if (info->known_contexts.exists ())
1597 srcctx = info->known_contexts[srcidx];
1598 }
1599 else
1600 {
1601 if (!info->lattices
1602 || srcidx >= ipa_get_param_count (info))
1603 return ctx;
1604 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1605 lat = ipa_get_poly_ctx_lat (info, srcidx);
1606 if (!lat->is_single_const ())
1607 return ctx;
1608 srcctx = lat->values->value;
1609 }
1610 if (srcctx.useless_p ())
1611 return ctx;
1612 if (jfunc->type == IPA_JF_ANCESTOR)
1613 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1614 if (!type_preserved)
1615 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1616 srcctx.combine_with (ctx);
1617 return srcctx;
1618 }
1619
1620 return ctx;
1621 }
1622
1623 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1624 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1625 the result is a range or an anti-range. */
1626
1627 static bool
ipa_vr_operation_and_type_effects(value_range * dst_vr,value_range * src_vr,enum tree_code operation,tree dst_type,tree src_type)1628 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1629 value_range *src_vr,
1630 enum tree_code operation,
1631 tree dst_type, tree src_type)
1632 {
1633 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1634 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1635 return false;
1636 return true;
1637 }
1638
1639 /* Determine value_range of JFUNC given that INFO describes the caller node or
1640 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1641 and PARM_TYPE of the parameter. */
1642
1643 value_range
ipa_value_range_from_jfunc(ipa_node_params * info,cgraph_edge * cs,ipa_jump_func * jfunc,tree parm_type)1644 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1645 ipa_jump_func *jfunc, tree parm_type)
1646 {
1647 value_range vr;
1648 if (jfunc->m_vr)
1649 ipa_vr_operation_and_type_effects (&vr,
1650 jfunc->m_vr,
1651 NOP_EXPR, parm_type,
1652 jfunc->m_vr->type ());
1653 if (vr.singleton_p ())
1654 return vr;
1655 if (jfunc->type == IPA_JF_PASS_THROUGH)
1656 {
1657 int idx;
1658 ipcp_transformation *sum
1659 = ipcp_get_transformation_summary (cs->caller->inlined_to
1660 ? cs->caller->inlined_to
1661 : cs->caller);
1662 if (!sum || !sum->m_vr)
1663 return vr;
1664
1665 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1666
1667 if (!(*sum->m_vr)[idx].known)
1668 return vr;
1669 tree vr_type = ipa_get_type (info, idx);
1670 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1671 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1672 (*sum->m_vr)[idx].type);
1673
1674 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1675
1676 if (TREE_CODE_CLASS (operation) == tcc_unary)
1677 {
1678 value_range res;
1679
1680 if (ipa_vr_operation_and_type_effects (&res,
1681 &srcvr,
1682 operation, parm_type,
1683 vr_type))
1684 vr.intersect (res);
1685 }
1686 else
1687 {
1688 value_range op_res, res;
1689 tree op = ipa_get_jf_pass_through_operand (jfunc);
1690 value_range op_vr (op, op);
1691
1692 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1693 if (ipa_vr_operation_and_type_effects (&res,
1694 &op_res,
1695 NOP_EXPR, parm_type,
1696 vr_type))
1697 vr.intersect (res);
1698 }
1699 }
1700 return vr;
1701 }
1702
1703 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1704 parameter with the given INDEX. */
1705
1706 static tree
get_clone_agg_value(struct cgraph_node * node,HOST_WIDE_INT offset,int index)1707 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1708 int index)
1709 {
1710 struct ipa_agg_replacement_value *aggval;
1711
1712 aggval = ipa_get_agg_replacements_for_node (node);
1713 while (aggval)
1714 {
1715 if (aggval->offset == offset
1716 && aggval->index == index)
1717 return aggval->value;
1718 aggval = aggval->next;
1719 }
1720 return NULL_TREE;
1721 }
1722
1723 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1724 single known constant value and if so, return it. Otherwise return NULL.
1725 NODE and INFO describes the caller node or the one it is inlined to, and
1726 its related info. */
1727
1728 static tree
ipa_agg_value_from_node(class ipa_node_params * info,struct cgraph_node * node,struct ipa_agg_jf_item * item)1729 ipa_agg_value_from_node (class ipa_node_params *info,
1730 struct cgraph_node *node,
1731 struct ipa_agg_jf_item *item)
1732 {
1733 tree value = NULL_TREE;
1734 int src_idx;
1735
1736 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1737 return NULL_TREE;
1738
1739 if (item->jftype == IPA_JF_CONST)
1740 return item->value.constant;
1741
1742 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1743 || item->jftype == IPA_JF_LOAD_AGG);
1744
1745 src_idx = item->value.pass_through.formal_id;
1746
1747 if (info->ipcp_orig_node)
1748 {
1749 if (item->jftype == IPA_JF_PASS_THROUGH)
1750 value = info->known_csts[src_idx];
1751 else
1752 value = get_clone_agg_value (node, item->value.load_agg.offset,
1753 src_idx);
1754 }
1755 else if (info->lattices)
1756 {
1757 class ipcp_param_lattices *src_plats
1758 = ipa_get_parm_lattices (info, src_idx);
1759
1760 if (item->jftype == IPA_JF_PASS_THROUGH)
1761 {
1762 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1763
1764 if (!lat->is_single_const ())
1765 return NULL_TREE;
1766
1767 value = lat->values->value;
1768 }
1769 else if (src_plats->aggs
1770 && !src_plats->aggs_bottom
1771 && !src_plats->aggs_contain_variable
1772 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1773 {
1774 struct ipcp_agg_lattice *aglat;
1775
1776 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1777 {
1778 if (aglat->offset > item->value.load_agg.offset)
1779 break;
1780
1781 if (aglat->offset == item->value.load_agg.offset)
1782 {
1783 if (aglat->is_single_const ())
1784 value = aglat->values->value;
1785 break;
1786 }
1787 }
1788 }
1789 }
1790
1791 if (!value)
1792 return NULL_TREE;
1793
1794 if (item->jftype == IPA_JF_LOAD_AGG)
1795 {
1796 tree load_type = item->value.load_agg.type;
1797 tree value_type = TREE_TYPE (value);
1798
1799 /* Ensure value type is compatible with load type. */
1800 if (!useless_type_conversion_p (load_type, value_type))
1801 return NULL_TREE;
1802 }
1803
1804 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1805 value,
1806 item->value.pass_through.operand,
1807 item->type);
1808 }
1809
1810 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1811 an aggregate and if so, return it. Otherwise return an empty set. NODE
1812 and INFO describes the caller node or the one it is inlined to, and its
1813 related info. */
1814
1815 struct ipa_agg_value_set
ipa_agg_value_set_from_jfunc(class ipa_node_params * info,cgraph_node * node,struct ipa_agg_jump_function * agg_jfunc)1816 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1817 struct ipa_agg_jump_function *agg_jfunc)
1818 {
1819 struct ipa_agg_value_set agg;
1820 struct ipa_agg_jf_item *item;
1821 int i;
1822
1823 agg.items = vNULL;
1824 agg.by_ref = agg_jfunc->by_ref;
1825
1826 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1827 {
1828 tree value = ipa_agg_value_from_node (info, node, item);
1829
1830 if (value)
1831 {
1832 struct ipa_agg_value value_item;
1833
1834 value_item.offset = item->offset;
1835 value_item.value = value;
1836
1837 agg.items.safe_push (value_item);
1838 }
1839 }
1840 return agg;
1841 }
1842
1843 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1844 bottom, not containing a variable component and without any known value at
1845 the same time. */
1846
1847 DEBUG_FUNCTION void
ipcp_verify_propagated_values(void)1848 ipcp_verify_propagated_values (void)
1849 {
1850 struct cgraph_node *node;
1851
1852 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1853 {
1854 ipa_node_params *info = ipa_node_params_sum->get (node);
1855 if (!opt_for_fn (node->decl, flag_ipa_cp)
1856 || !opt_for_fn (node->decl, optimize))
1857 continue;
1858 int i, count = ipa_get_param_count (info);
1859
1860 for (i = 0; i < count; i++)
1861 {
1862 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1863
1864 if (!lat->bottom
1865 && !lat->contains_variable
1866 && lat->values_count == 0)
1867 {
1868 if (dump_file)
1869 {
1870 symtab->dump (dump_file);
1871 fprintf (dump_file, "\nIPA lattices after constant "
1872 "propagation, before gcc_unreachable:\n");
1873 print_all_lattices (dump_file, true, false);
1874 }
1875
1876 gcc_unreachable ();
1877 }
1878 }
1879 }
1880 }
1881
1882 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1883
1884 static bool
values_equal_for_ipcp_p(ipa_polymorphic_call_context x,ipa_polymorphic_call_context y)1885 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1886 ipa_polymorphic_call_context y)
1887 {
1888 return x.equal_to (y);
1889 }
1890
1891
1892 /* Add a new value source to the value represented by THIS, marking that a
1893 value comes from edge CS and (if the underlying jump function is a
1894 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1895 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1896 scalar value of the parameter itself or the offset within an aggregate. */
1897
1898 template <typename valtype>
1899 void
add_source(cgraph_edge * cs,ipcp_value * src_val,int src_idx,HOST_WIDE_INT offset)1900 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1901 int src_idx, HOST_WIDE_INT offset)
1902 {
1903 ipcp_value_source<valtype> *src;
1904
1905 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1906 src->offset = offset;
1907 src->cs = cs;
1908 src->val = src_val;
1909 src->index = src_idx;
1910
1911 src->next = sources;
1912 sources = src;
1913 }
1914
1915 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1916 SOURCE and clear all other fields. */
1917
1918 static ipcp_value<tree> *
allocate_and_init_ipcp_value(tree cst,unsigned same_lat_gen_level)1919 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1920 {
1921 ipcp_value<tree> *val;
1922
1923 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1924 val->value = cst;
1925 val->self_recursion_generated_level = same_lat_gen_level;
1926 return val;
1927 }
1928
1929 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1930 value to SOURCE and clear all other fields. */
1931
1932 static ipcp_value<ipa_polymorphic_call_context> *
allocate_and_init_ipcp_value(ipa_polymorphic_call_context ctx,unsigned same_lat_gen_level)1933 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1934 unsigned same_lat_gen_level)
1935 {
1936 ipcp_value<ipa_polymorphic_call_context> *val;
1937
1938 val = new (ipcp_poly_ctx_values_pool.allocate ())
1939 ipcp_value<ipa_polymorphic_call_context>();
1940 val->value = ctx;
1941 val->self_recursion_generated_level = same_lat_gen_level;
1942 return val;
1943 }
1944
1945 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1946 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1947 meaning. OFFSET -1 means the source is scalar and not a part of an
1948 aggregate. If non-NULL, VAL_P records address of existing or newly added
1949 ipcp_value.
1950
1951 If the value is generated for a self-recursive call as a result of an
1952 arithmetic pass-through jump-function acting on a value in the same lattice,
1953 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1954 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1955
1956 template <typename valtype>
1957 bool
add_value(valtype newval,cgraph_edge * cs,ipcp_value<valtype> * src_val,int src_idx,HOST_WIDE_INT offset,ipcp_value<valtype> ** val_p,unsigned same_lat_gen_level)1958 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1959 ipcp_value<valtype> *src_val,
1960 int src_idx, HOST_WIDE_INT offset,
1961 ipcp_value<valtype> **val_p,
1962 unsigned same_lat_gen_level)
1963 {
1964 ipcp_value<valtype> *val, *last_val = NULL;
1965
1966 if (val_p)
1967 *val_p = NULL;
1968
1969 if (bottom)
1970 return false;
1971
1972 for (val = values; val; last_val = val, val = val->next)
1973 if (values_equal_for_ipcp_p (val->value, newval))
1974 {
1975 if (val_p)
1976 *val_p = val;
1977
1978 if (val->self_recursion_generated_level < same_lat_gen_level)
1979 val->self_recursion_generated_level = same_lat_gen_level;
1980
1981 if (ipa_edge_within_scc (cs))
1982 {
1983 ipcp_value_source<valtype> *s;
1984 for (s = val->sources; s; s = s->next)
1985 if (s->cs == cs && s->val == src_val)
1986 break;
1987 if (s)
1988 return false;
1989 }
1990
1991 val->add_source (cs, src_val, src_idx, offset);
1992 return false;
1993 }
1994
1995 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1996 param_ipa_cp_value_list_size))
1997 {
1998 /* We can only free sources, not the values themselves, because sources
1999 of other values in this SCC might point to them. */
2000 for (val = values; val; val = val->next)
2001 {
2002 while (val->sources)
2003 {
2004 ipcp_value_source<valtype> *src = val->sources;
2005 val->sources = src->next;
2006 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2007 }
2008 }
2009 values = NULL;
2010 return set_to_bottom ();
2011 }
2012
2013 values_count++;
2014 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2015 val->add_source (cs, src_val, src_idx, offset);
2016 val->next = NULL;
2017
2018 /* Add the new value to end of value list, which can reduce iterations
2019 of propagation stage for recursive function. */
2020 if (last_val)
2021 last_val->next = val;
2022 else
2023 values = val;
2024
2025 if (val_p)
2026 *val_p = val;
2027
2028 return true;
2029 }
2030
2031 /* A helper function that returns result of operation specified by OPCODE on
2032 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2033 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2034 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2035 the result. */
2036
2037 static tree
get_val_across_arith_op(enum tree_code opcode,tree opnd1_type,tree opnd2,ipcp_value<tree> * src_val,tree res_type)2038 get_val_across_arith_op (enum tree_code opcode,
2039 tree opnd1_type,
2040 tree opnd2,
2041 ipcp_value<tree> *src_val,
2042 tree res_type)
2043 {
2044 tree opnd1 = src_val->value;
2045
2046 /* Skip source values that is incompatible with specified type. */
2047 if (opnd1_type
2048 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2049 return NULL_TREE;
2050
2051 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2052 }
2053
2054 /* Propagate values through an arithmetic transformation described by a jump
2055 function associated with edge CS, taking values from SRC_LAT and putting
2056 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2057 OPND2 is a constant value if transformation is a binary operation.
2058 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2059 a part of the aggregate. SRC_IDX is the index of the source parameter.
2060 RES_TYPE is the value type of result being propagated into. Return true if
2061 DEST_LAT changed. */
2062
2063 static bool
propagate_vals_across_arith_jfunc(cgraph_edge * cs,enum tree_code opcode,tree opnd1_type,tree opnd2,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,HOST_WIDE_INT src_offset,int src_idx,tree res_type)2064 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2065 enum tree_code opcode,
2066 tree opnd1_type,
2067 tree opnd2,
2068 ipcp_lattice<tree> *src_lat,
2069 ipcp_lattice<tree> *dest_lat,
2070 HOST_WIDE_INT src_offset,
2071 int src_idx,
2072 tree res_type)
2073 {
2074 ipcp_value<tree> *src_val;
2075 bool ret = false;
2076
2077 /* Due to circular dependencies, propagating within an SCC through arithmetic
2078 transformation would create infinite number of values. But for
2079 self-feeding recursive function, we could allow propagation in a limited
2080 count, and this can enable a simple kind of recursive function versioning.
2081 For other scenario, we would just make lattices bottom. */
2082 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2083 {
2084 int i;
2085
2086 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2087 param_ipa_cp_max_recursive_depth);
2088 if (src_lat != dest_lat || max_recursive_depth < 1)
2089 return dest_lat->set_contains_variable ();
2090
2091 /* No benefit if recursive execution is in low probability. */
2092 if (cs->sreal_frequency () * 100
2093 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2094 param_ipa_cp_min_recursive_probability))
2095 return dest_lat->set_contains_variable ();
2096
2097 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2098
2099 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2100 {
2101 /* Now we do not use self-recursively generated value as propagation
2102 source, this is absolutely conservative, but could avoid explosion
2103 of lattice's value space, especially when one recursive function
2104 calls another recursive. */
2105 if (src_val->self_recursion_generated_p ())
2106 {
2107 ipcp_value_source<tree> *s;
2108
2109 /* If the lattice has already been propagated for the call site,
2110 no need to do that again. */
2111 for (s = src_val->sources; s; s = s->next)
2112 if (s->cs == cs)
2113 return dest_lat->set_contains_variable ();
2114 }
2115 else
2116 val_seeds.safe_push (src_val);
2117 }
2118
2119 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2120
2121 /* Recursively generate lattice values with a limited count. */
2122 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2123 {
2124 for (int j = 1; j < max_recursive_depth; j++)
2125 {
2126 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2127 src_val, res_type);
2128 if (!cstval
2129 || !ipacp_value_safe_for_type (res_type, cstval))
2130 break;
2131
2132 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2133 src_offset, &src_val, j);
2134 gcc_checking_assert (src_val);
2135 }
2136 }
2137 ret |= dest_lat->set_contains_variable ();
2138 }
2139 else
2140 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2141 {
2142 /* Now we do not use self-recursively generated value as propagation
2143 source, otherwise it is easy to make value space of normal lattice
2144 overflow. */
2145 if (src_val->self_recursion_generated_p ())
2146 {
2147 ret |= dest_lat->set_contains_variable ();
2148 continue;
2149 }
2150
2151 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2152 src_val, res_type);
2153 if (cstval
2154 && ipacp_value_safe_for_type (res_type, cstval))
2155 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2156 src_offset);
2157 else
2158 ret |= dest_lat->set_contains_variable ();
2159 }
2160
2161 return ret;
2162 }
2163
2164 /* Propagate values through a pass-through jump function JFUNC associated with
2165 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2166 is the index of the source parameter. PARM_TYPE is the type of the
2167 parameter to which the result is passed. */
2168
2169 static bool
propagate_vals_across_pass_through(cgraph_edge * cs,ipa_jump_func * jfunc,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,int src_idx,tree parm_type)2170 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2171 ipcp_lattice<tree> *src_lat,
2172 ipcp_lattice<tree> *dest_lat, int src_idx,
2173 tree parm_type)
2174 {
2175 return propagate_vals_across_arith_jfunc (cs,
2176 ipa_get_jf_pass_through_operation (jfunc),
2177 NULL_TREE,
2178 ipa_get_jf_pass_through_operand (jfunc),
2179 src_lat, dest_lat, -1, src_idx, parm_type);
2180 }
2181
2182 /* Propagate values through an ancestor jump function JFUNC associated with
2183 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2184 is the index of the source parameter. */
2185
2186 static bool
propagate_vals_across_ancestor(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,int src_idx,tree param_type)2187 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2188 struct ipa_jump_func *jfunc,
2189 ipcp_lattice<tree> *src_lat,
2190 ipcp_lattice<tree> *dest_lat, int src_idx,
2191 tree param_type)
2192 {
2193 ipcp_value<tree> *src_val;
2194 bool ret = false;
2195
2196 if (ipa_edge_within_scc (cs))
2197 return dest_lat->set_contains_variable ();
2198
2199 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2200 {
2201 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2202
2203 if (t && ipacp_value_safe_for_type (param_type, t))
2204 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2205 else
2206 ret |= dest_lat->set_contains_variable ();
2207 }
2208
2209 return ret;
2210 }
2211
2212 /* Propagate scalar values across jump function JFUNC that is associated with
2213 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2214 parameter to which the result is passed. */
2215
2216 static bool
propagate_scalar_across_jump_function(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,ipcp_lattice<tree> * dest_lat,tree param_type)2217 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2218 struct ipa_jump_func *jfunc,
2219 ipcp_lattice<tree> *dest_lat,
2220 tree param_type)
2221 {
2222 if (dest_lat->bottom)
2223 return false;
2224
2225 if (jfunc->type == IPA_JF_CONST)
2226 {
2227 tree val = ipa_get_jf_constant (jfunc);
2228 if (ipacp_value_safe_for_type (param_type, val))
2229 return dest_lat->add_value (val, cs, NULL, 0);
2230 else
2231 return dest_lat->set_contains_variable ();
2232 }
2233 else if (jfunc->type == IPA_JF_PASS_THROUGH
2234 || jfunc->type == IPA_JF_ANCESTOR)
2235 {
2236 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2237 ipcp_lattice<tree> *src_lat;
2238 int src_idx;
2239 bool ret;
2240
2241 if (jfunc->type == IPA_JF_PASS_THROUGH)
2242 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2243 else
2244 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2245
2246 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2247 if (src_lat->bottom)
2248 return dest_lat->set_contains_variable ();
2249
2250 /* If we would need to clone the caller and cannot, do not propagate. */
2251 if (!ipcp_versionable_function_p (cs->caller)
2252 && (src_lat->contains_variable
2253 || (src_lat->values_count > 1)))
2254 return dest_lat->set_contains_variable ();
2255
2256 if (jfunc->type == IPA_JF_PASS_THROUGH)
2257 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2258 dest_lat, src_idx,
2259 param_type);
2260 else
2261 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2262 src_idx, param_type);
2263
2264 if (src_lat->contains_variable)
2265 ret |= dest_lat->set_contains_variable ();
2266
2267 return ret;
2268 }
2269
2270 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2271 use it for indirect inlining), we should propagate them too. */
2272 return dest_lat->set_contains_variable ();
2273 }
2274
2275 /* Propagate scalar values across jump function JFUNC that is associated with
2276 edge CS and describes argument IDX and put the values into DEST_LAT. */
2277
2278 static bool
propagate_context_across_jump_function(cgraph_edge * cs,ipa_jump_func * jfunc,int idx,ipcp_lattice<ipa_polymorphic_call_context> * dest_lat)2279 propagate_context_across_jump_function (cgraph_edge *cs,
2280 ipa_jump_func *jfunc, int idx,
2281 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2282 {
2283 if (dest_lat->bottom)
2284 return false;
2285 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2286 bool ret = false;
2287 bool added_sth = false;
2288 bool type_preserved = true;
2289
2290 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2291 = ipa_get_ith_polymorhic_call_context (args, idx);
2292
2293 if (edge_ctx_ptr)
2294 edge_ctx = *edge_ctx_ptr;
2295
2296 if (jfunc->type == IPA_JF_PASS_THROUGH
2297 || jfunc->type == IPA_JF_ANCESTOR)
2298 {
2299 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2300 int src_idx;
2301 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2302
2303 /* TODO: Once we figure out how to propagate speculations, it will
2304 probably be a good idea to switch to speculation if type_preserved is
2305 not set instead of punting. */
2306 if (jfunc->type == IPA_JF_PASS_THROUGH)
2307 {
2308 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2309 goto prop_fail;
2310 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2311 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2312 }
2313 else
2314 {
2315 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2316 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2317 }
2318
2319 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2320 /* If we would need to clone the caller and cannot, do not propagate. */
2321 if (!ipcp_versionable_function_p (cs->caller)
2322 && (src_lat->contains_variable
2323 || (src_lat->values_count > 1)))
2324 goto prop_fail;
2325
2326 ipcp_value<ipa_polymorphic_call_context> *src_val;
2327 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2328 {
2329 ipa_polymorphic_call_context cur = src_val->value;
2330
2331 if (!type_preserved)
2332 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2333 if (jfunc->type == IPA_JF_ANCESTOR)
2334 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2335 /* TODO: In cases we know how the context is going to be used,
2336 we can improve the result by passing proper OTR_TYPE. */
2337 cur.combine_with (edge_ctx);
2338 if (!cur.useless_p ())
2339 {
2340 if (src_lat->contains_variable
2341 && !edge_ctx.equal_to (cur))
2342 ret |= dest_lat->set_contains_variable ();
2343 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2344 added_sth = true;
2345 }
2346 }
2347 }
2348
2349 prop_fail:
2350 if (!added_sth)
2351 {
2352 if (!edge_ctx.useless_p ())
2353 ret |= dest_lat->add_value (edge_ctx, cs);
2354 else
2355 ret |= dest_lat->set_contains_variable ();
2356 }
2357
2358 return ret;
2359 }
2360
2361 /* Propagate bits across jfunc that is associated with
2362 edge cs and update dest_lattice accordingly. */
2363
2364 bool
propagate_bits_across_jump_function(cgraph_edge * cs,int idx,ipa_jump_func * jfunc,ipcp_bits_lattice * dest_lattice)2365 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2366 ipa_jump_func *jfunc,
2367 ipcp_bits_lattice *dest_lattice)
2368 {
2369 if (dest_lattice->bottom_p ())
2370 return false;
2371
2372 enum availability availability;
2373 cgraph_node *callee = cs->callee->function_symbol (&availability);
2374 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2375 tree parm_type = ipa_get_type (callee_info, idx);
2376
2377 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2378 transform for these cases. Similarly, we can have bad type mismatches
2379 with LTO, avoid doing anything with those too. */
2380 if (!parm_type
2381 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2382 {
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2385 "param %i of %s is NULL or unsuitable for bits propagation\n",
2386 idx, cs->callee->dump_name ());
2387
2388 return dest_lattice->set_to_bottom ();
2389 }
2390
2391 unsigned precision = TYPE_PRECISION (parm_type);
2392 signop sgn = TYPE_SIGN (parm_type);
2393
2394 if (jfunc->type == IPA_JF_PASS_THROUGH
2395 || jfunc->type == IPA_JF_ANCESTOR)
2396 {
2397 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2398 tree operand = NULL_TREE;
2399 enum tree_code code;
2400 unsigned src_idx;
2401 bool keep_null = false;
2402
2403 if (jfunc->type == IPA_JF_PASS_THROUGH)
2404 {
2405 code = ipa_get_jf_pass_through_operation (jfunc);
2406 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2407 if (code != NOP_EXPR)
2408 operand = ipa_get_jf_pass_through_operand (jfunc);
2409 }
2410 else
2411 {
2412 code = POINTER_PLUS_EXPR;
2413 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2414 unsigned HOST_WIDE_INT offset
2415 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2416 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2417 operand = build_int_cstu (size_type_node, offset);
2418 }
2419
2420 class ipcp_param_lattices *src_lats
2421 = ipa_get_parm_lattices (caller_info, src_idx);
2422
2423 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2424 for eg consider:
2425 int f(int x)
2426 {
2427 g (x & 0xff);
2428 }
2429 Assume lattice for x is bottom, however we can still propagate
2430 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2431 and we store it in jump function during analysis stage. */
2432
2433 if (!src_lats->bits_lattice.bottom_p ())
2434 {
2435 bool drop_all_ones
2436 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2437
2438 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2439 sgn, code, operand, drop_all_ones);
2440 }
2441 }
2442
2443 if (jfunc->bits)
2444 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2445 precision);
2446 else
2447 return dest_lattice->set_to_bottom ();
2448 }
2449
2450 /* Propagate value range across jump function JFUNC that is associated with
2451 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2452 accordingly. */
2453
2454 static bool
propagate_vr_across_jump_function(cgraph_edge * cs,ipa_jump_func * jfunc,class ipcp_param_lattices * dest_plats,tree param_type)2455 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2456 class ipcp_param_lattices *dest_plats,
2457 tree param_type)
2458 {
2459 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2460
2461 if (dest_lat->bottom_p ())
2462 return false;
2463
2464 if (!param_type
2465 || (!INTEGRAL_TYPE_P (param_type)
2466 && !POINTER_TYPE_P (param_type)))
2467 return dest_lat->set_to_bottom ();
2468
2469 if (jfunc->type == IPA_JF_PASS_THROUGH)
2470 {
2471 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2472 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2473 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2474 class ipcp_param_lattices *src_lats
2475 = ipa_get_parm_lattices (caller_info, src_idx);
2476 tree operand_type = ipa_get_type (caller_info, src_idx);
2477
2478 if (src_lats->m_value_range.bottom_p ())
2479 return dest_lat->set_to_bottom ();
2480
2481 value_range vr;
2482 if (TREE_CODE_CLASS (operation) == tcc_unary)
2483 ipa_vr_operation_and_type_effects (&vr,
2484 &src_lats->m_value_range.m_vr,
2485 operation, param_type,
2486 operand_type);
2487 /* A crude way to prevent unbounded number of value range updates
2488 in SCC components. We should allow limited number of updates within
2489 SCC, too. */
2490 else if (!ipa_edge_within_scc (cs))
2491 {
2492 tree op = ipa_get_jf_pass_through_operand (jfunc);
2493 value_range op_vr (op, op);
2494 value_range op_res,res;
2495
2496 range_fold_binary_expr (&op_res, operation, operand_type,
2497 &src_lats->m_value_range.m_vr, &op_vr);
2498 ipa_vr_operation_and_type_effects (&vr,
2499 &op_res,
2500 NOP_EXPR, param_type,
2501 operand_type);
2502 }
2503 if (!vr.undefined_p () && !vr.varying_p ())
2504 {
2505 if (jfunc->m_vr)
2506 {
2507 value_range jvr;
2508 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2509 NOP_EXPR,
2510 param_type,
2511 jfunc->m_vr->type ()))
2512 vr.intersect (jvr);
2513 }
2514 return dest_lat->meet_with (&vr);
2515 }
2516 }
2517 else if (jfunc->type == IPA_JF_CONST)
2518 {
2519 tree val = ipa_get_jf_constant (jfunc);
2520 if (TREE_CODE (val) == INTEGER_CST)
2521 {
2522 val = fold_convert (param_type, val);
2523 if (TREE_OVERFLOW_P (val))
2524 val = drop_tree_overflow (val);
2525
2526 value_range tmpvr (val, val);
2527 return dest_lat->meet_with (&tmpvr);
2528 }
2529 }
2530
2531 value_range vr;
2532 if (jfunc->m_vr
2533 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2534 param_type,
2535 jfunc->m_vr->type ()))
2536 return dest_lat->meet_with (&vr);
2537 else
2538 return dest_lat->set_to_bottom ();
2539 }
2540
2541 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2542 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2543 other cases, return false). If there are no aggregate items, set
2544 aggs_by_ref to NEW_AGGS_BY_REF. */
2545
2546 static bool
set_check_aggs_by_ref(class ipcp_param_lattices * dest_plats,bool new_aggs_by_ref)2547 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2548 bool new_aggs_by_ref)
2549 {
2550 if (dest_plats->aggs)
2551 {
2552 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2553 {
2554 set_agg_lats_to_bottom (dest_plats);
2555 return true;
2556 }
2557 }
2558 else
2559 dest_plats->aggs_by_ref = new_aggs_by_ref;
2560 return false;
2561 }
2562
2563 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2564 already existing lattice for the given OFFSET and SIZE, marking all skipped
2565 lattices as containing variable and checking for overlaps. If there is no
2566 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2567 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2568 unless there are too many already. If there are two many, return false. If
2569 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2570 skipped lattices were newly marked as containing variable, set *CHANGE to
2571 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2572
2573 static bool
merge_agg_lats_step(class ipcp_param_lattices * dest_plats,HOST_WIDE_INT offset,HOST_WIDE_INT val_size,struct ipcp_agg_lattice *** aglat,bool pre_existing,bool * change,int max_agg_items)2574 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2575 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2576 struct ipcp_agg_lattice ***aglat,
2577 bool pre_existing, bool *change, int max_agg_items)
2578 {
2579 gcc_checking_assert (offset >= 0);
2580
2581 while (**aglat && (**aglat)->offset < offset)
2582 {
2583 if ((**aglat)->offset + (**aglat)->size > offset)
2584 {
2585 set_agg_lats_to_bottom (dest_plats);
2586 return false;
2587 }
2588 *change |= (**aglat)->set_contains_variable ();
2589 *aglat = &(**aglat)->next;
2590 }
2591
2592 if (**aglat && (**aglat)->offset == offset)
2593 {
2594 if ((**aglat)->size != val_size)
2595 {
2596 set_agg_lats_to_bottom (dest_plats);
2597 return false;
2598 }
2599 gcc_assert (!(**aglat)->next
2600 || (**aglat)->next->offset >= offset + val_size);
2601 return true;
2602 }
2603 else
2604 {
2605 struct ipcp_agg_lattice *new_al;
2606
2607 if (**aglat && (**aglat)->offset < offset + val_size)
2608 {
2609 set_agg_lats_to_bottom (dest_plats);
2610 return false;
2611 }
2612 if (dest_plats->aggs_count == max_agg_items)
2613 return false;
2614 dest_plats->aggs_count++;
2615 new_al = ipcp_agg_lattice_pool.allocate ();
2616 memset (new_al, 0, sizeof (*new_al));
2617
2618 new_al->offset = offset;
2619 new_al->size = val_size;
2620 new_al->contains_variable = pre_existing;
2621
2622 new_al->next = **aglat;
2623 **aglat = new_al;
2624 return true;
2625 }
2626 }
2627
2628 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2629 containing an unknown value. */
2630
2631 static bool
set_chain_of_aglats_contains_variable(struct ipcp_agg_lattice * aglat)2632 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2633 {
2634 bool ret = false;
2635 while (aglat)
2636 {
2637 ret |= aglat->set_contains_variable ();
2638 aglat = aglat->next;
2639 }
2640 return ret;
2641 }
2642
2643 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2644 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2645 parameter used for lattice value sources. Return true if DEST_PLATS changed
2646 in any way. */
2647
2648 static bool
merge_aggregate_lattices(struct cgraph_edge * cs,class ipcp_param_lattices * dest_plats,class ipcp_param_lattices * src_plats,int src_idx,HOST_WIDE_INT offset_delta)2649 merge_aggregate_lattices (struct cgraph_edge *cs,
2650 class ipcp_param_lattices *dest_plats,
2651 class ipcp_param_lattices *src_plats,
2652 int src_idx, HOST_WIDE_INT offset_delta)
2653 {
2654 bool pre_existing = dest_plats->aggs != NULL;
2655 struct ipcp_agg_lattice **dst_aglat;
2656 bool ret = false;
2657
2658 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2659 return true;
2660 if (src_plats->aggs_bottom)
2661 return set_agg_lats_contain_variable (dest_plats);
2662 if (src_plats->aggs_contain_variable)
2663 ret |= set_agg_lats_contain_variable (dest_plats);
2664 dst_aglat = &dest_plats->aggs;
2665
2666 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2667 param_ipa_max_agg_items);
2668 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2669 src_aglat;
2670 src_aglat = src_aglat->next)
2671 {
2672 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2673
2674 if (new_offset < 0)
2675 continue;
2676 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2677 &dst_aglat, pre_existing, &ret, max_agg_items))
2678 {
2679 struct ipcp_agg_lattice *new_al = *dst_aglat;
2680
2681 dst_aglat = &(*dst_aglat)->next;
2682 if (src_aglat->bottom)
2683 {
2684 ret |= new_al->set_contains_variable ();
2685 continue;
2686 }
2687 if (src_aglat->contains_variable)
2688 ret |= new_al->set_contains_variable ();
2689 for (ipcp_value<tree> *val = src_aglat->values;
2690 val;
2691 val = val->next)
2692 ret |= new_al->add_value (val->value, cs, val, src_idx,
2693 src_aglat->offset);
2694 }
2695 else if (dest_plats->aggs_bottom)
2696 return true;
2697 }
2698 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2699 return ret;
2700 }
2701
2702 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2703 pass-through JFUNC and if so, whether it has conform and conforms to the
2704 rules about propagating values passed by reference. */
2705
2706 static bool
agg_pass_through_permissible_p(class ipcp_param_lattices * src_plats,struct ipa_jump_func * jfunc)2707 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2708 struct ipa_jump_func *jfunc)
2709 {
2710 return src_plats->aggs
2711 && (!src_plats->aggs_by_ref
2712 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2713 }
2714
2715 /* Propagate values through ITEM, jump function for a part of an aggregate,
2716 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2717 associated with the jump function. Return true if AGLAT changed in any
2718 way. */
2719
2720 static bool
propagate_aggregate_lattice(struct cgraph_edge * cs,struct ipa_agg_jf_item * item,struct ipcp_agg_lattice * aglat)2721 propagate_aggregate_lattice (struct cgraph_edge *cs,
2722 struct ipa_agg_jf_item *item,
2723 struct ipcp_agg_lattice *aglat)
2724 {
2725 class ipa_node_params *caller_info;
2726 class ipcp_param_lattices *src_plats;
2727 struct ipcp_lattice<tree> *src_lat;
2728 HOST_WIDE_INT src_offset;
2729 int src_idx;
2730 tree load_type;
2731 bool ret;
2732
2733 if (item->jftype == IPA_JF_CONST)
2734 {
2735 tree value = item->value.constant;
2736
2737 gcc_checking_assert (is_gimple_ip_invariant (value));
2738 return aglat->add_value (value, cs, NULL, 0);
2739 }
2740
2741 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2742 || item->jftype == IPA_JF_LOAD_AGG);
2743
2744 caller_info = ipa_node_params_sum->get (cs->caller);
2745 src_idx = item->value.pass_through.formal_id;
2746 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2747
2748 if (item->jftype == IPA_JF_PASS_THROUGH)
2749 {
2750 load_type = NULL_TREE;
2751 src_lat = &src_plats->itself;
2752 src_offset = -1;
2753 }
2754 else
2755 {
2756 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2757 struct ipcp_agg_lattice *src_aglat;
2758
2759 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2760 if (src_aglat->offset >= load_offset)
2761 break;
2762
2763 load_type = item->value.load_agg.type;
2764 if (!src_aglat
2765 || src_aglat->offset > load_offset
2766 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2767 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2768 return aglat->set_contains_variable ();
2769
2770 src_lat = src_aglat;
2771 src_offset = load_offset;
2772 }
2773
2774 if (src_lat->bottom
2775 || (!ipcp_versionable_function_p (cs->caller)
2776 && !src_lat->is_single_const ()))
2777 return aglat->set_contains_variable ();
2778
2779 ret = propagate_vals_across_arith_jfunc (cs,
2780 item->value.pass_through.operation,
2781 load_type,
2782 item->value.pass_through.operand,
2783 src_lat, aglat,
2784 src_offset,
2785 src_idx,
2786 item->type);
2787
2788 if (src_lat->contains_variable)
2789 ret |= aglat->set_contains_variable ();
2790
2791 return ret;
2792 }
2793
2794 /* Propagate scalar values across jump function JFUNC that is associated with
2795 edge CS and put the values into DEST_LAT. */
2796
2797 static bool
propagate_aggs_across_jump_function(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,class ipcp_param_lattices * dest_plats)2798 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2799 struct ipa_jump_func *jfunc,
2800 class ipcp_param_lattices *dest_plats)
2801 {
2802 bool ret = false;
2803
2804 if (dest_plats->aggs_bottom)
2805 return false;
2806
2807 if (jfunc->type == IPA_JF_PASS_THROUGH
2808 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2809 {
2810 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2811 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2812 class ipcp_param_lattices *src_plats;
2813
2814 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2815 if (agg_pass_through_permissible_p (src_plats, jfunc))
2816 {
2817 /* Currently we do not produce clobber aggregate jump
2818 functions, replace with merging when we do. */
2819 gcc_assert (!jfunc->agg.items);
2820 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2821 src_idx, 0);
2822 return ret;
2823 }
2824 }
2825 else if (jfunc->type == IPA_JF_ANCESTOR
2826 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2827 {
2828 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2829 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2830 class ipcp_param_lattices *src_plats;
2831
2832 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2833 if (src_plats->aggs && src_plats->aggs_by_ref)
2834 {
2835 /* Currently we do not produce clobber aggregate jump
2836 functions, replace with merging when we do. */
2837 gcc_assert (!jfunc->agg.items);
2838 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2839 ipa_get_jf_ancestor_offset (jfunc));
2840 }
2841 else if (!src_plats->aggs_by_ref)
2842 ret |= set_agg_lats_to_bottom (dest_plats);
2843 else
2844 ret |= set_agg_lats_contain_variable (dest_plats);
2845 return ret;
2846 }
2847
2848 if (jfunc->agg.items)
2849 {
2850 bool pre_existing = dest_plats->aggs != NULL;
2851 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2852 struct ipa_agg_jf_item *item;
2853 int i;
2854
2855 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2856 return true;
2857
2858 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2859 param_ipa_max_agg_items);
2860 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2861 {
2862 HOST_WIDE_INT val_size;
2863
2864 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2865 continue;
2866 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2867
2868 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2869 &aglat, pre_existing, &ret, max_agg_items))
2870 {
2871 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2872 aglat = &(*aglat)->next;
2873 }
2874 else if (dest_plats->aggs_bottom)
2875 return true;
2876 }
2877
2878 ret |= set_chain_of_aglats_contains_variable (*aglat);
2879 }
2880 else
2881 ret |= set_agg_lats_contain_variable (dest_plats);
2882
2883 return ret;
2884 }
2885
2886 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2887 non-thunk) destination, the call passes through a thunk. */
2888
2889 static bool
call_passes_through_thunk(cgraph_edge * cs)2890 call_passes_through_thunk (cgraph_edge *cs)
2891 {
2892 cgraph_node *alias_or_thunk = cs->callee;
2893 while (alias_or_thunk->alias)
2894 alias_or_thunk = alias_or_thunk->get_alias_target ();
2895 return alias_or_thunk->thunk;
2896 }
2897
2898 /* Propagate constants from the caller to the callee of CS. INFO describes the
2899 caller. */
2900
2901 static bool
propagate_constants_across_call(struct cgraph_edge * cs)2902 propagate_constants_across_call (struct cgraph_edge *cs)
2903 {
2904 class ipa_node_params *callee_info;
2905 enum availability availability;
2906 cgraph_node *callee;
2907 class ipa_edge_args *args;
2908 bool ret = false;
2909 int i, args_count, parms_count;
2910
2911 callee = cs->callee->function_symbol (&availability);
2912 if (!callee->definition)
2913 return false;
2914 gcc_checking_assert (callee->has_gimple_body_p ());
2915 callee_info = ipa_node_params_sum->get (callee);
2916 if (!callee_info)
2917 return false;
2918
2919 args = ipa_edge_args_sum->get (cs);
2920 parms_count = ipa_get_param_count (callee_info);
2921 if (parms_count == 0)
2922 return false;
2923 if (!args
2924 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2925 || !opt_for_fn (cs->caller->decl, optimize))
2926 {
2927 for (i = 0; i < parms_count; i++)
2928 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2929 i));
2930 return ret;
2931 }
2932 args_count = ipa_get_cs_argument_count (args);
2933
2934 /* If this call goes through a thunk we must not propagate to the first (0th)
2935 parameter. However, we might need to uncover a thunk from below a series
2936 of aliases first. */
2937 if (call_passes_through_thunk (cs))
2938 {
2939 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2940 0));
2941 i = 1;
2942 }
2943 else
2944 i = 0;
2945
2946 for (; (i < args_count) && (i < parms_count); i++)
2947 {
2948 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2949 class ipcp_param_lattices *dest_plats;
2950 tree param_type = ipa_get_type (callee_info, i);
2951
2952 dest_plats = ipa_get_parm_lattices (callee_info, i);
2953 if (availability == AVAIL_INTERPOSABLE)
2954 ret |= set_all_contains_variable (dest_plats);
2955 else
2956 {
2957 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2958 &dest_plats->itself,
2959 param_type);
2960 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2961 &dest_plats->ctxlat);
2962 ret
2963 |= propagate_bits_across_jump_function (cs, i, jump_func,
2964 &dest_plats->bits_lattice);
2965 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2966 dest_plats);
2967 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2968 ret |= propagate_vr_across_jump_function (cs, jump_func,
2969 dest_plats, param_type);
2970 else
2971 ret |= dest_plats->m_value_range.set_to_bottom ();
2972 }
2973 }
2974 for (; i < parms_count; i++)
2975 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2976
2977 return ret;
2978 }
2979
2980 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2981 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2982 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2983
2984 static tree
ipa_get_indirect_edge_target_1(struct cgraph_edge * ie,const vec<tree> & known_csts,const vec<ipa_polymorphic_call_context> & known_contexts,const vec<ipa_agg_value_set> & known_aggs,struct ipa_agg_replacement_value * agg_reps,bool * speculative)2985 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2986 const vec<tree> &known_csts,
2987 const vec<ipa_polymorphic_call_context> &known_contexts,
2988 const vec<ipa_agg_value_set> &known_aggs,
2989 struct ipa_agg_replacement_value *agg_reps,
2990 bool *speculative)
2991 {
2992 int param_index = ie->indirect_info->param_index;
2993 HOST_WIDE_INT anc_offset;
2994 tree t = NULL;
2995 tree target = NULL;
2996
2997 *speculative = false;
2998
2999 if (param_index == -1)
3000 return NULL_TREE;
3001
3002 if (!ie->indirect_info->polymorphic)
3003 {
3004 tree t = NULL;
3005
3006 if (ie->indirect_info->agg_contents)
3007 {
3008 t = NULL;
3009 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
3010 {
3011 while (agg_reps)
3012 {
3013 if (agg_reps->index == param_index
3014 && agg_reps->offset == ie->indirect_info->offset
3015 && agg_reps->by_ref == ie->indirect_info->by_ref)
3016 {
3017 t = agg_reps->value;
3018 break;
3019 }
3020 agg_reps = agg_reps->next;
3021 }
3022 }
3023 if (!t)
3024 {
3025 const ipa_agg_value_set *agg;
3026 if (known_aggs.length () > (unsigned int) param_index)
3027 agg = &known_aggs[param_index];
3028 else
3029 agg = NULL;
3030 bool from_global_constant;
3031 t = ipa_find_agg_cst_for_param (agg,
3032 (unsigned) param_index
3033 < known_csts.length ()
3034 ? known_csts[param_index]
3035 : NULL,
3036 ie->indirect_info->offset,
3037 ie->indirect_info->by_ref,
3038 &from_global_constant);
3039 if (t
3040 && !from_global_constant
3041 && !ie->indirect_info->guaranteed_unmodified)
3042 t = NULL_TREE;
3043 }
3044 }
3045 else if ((unsigned) param_index < known_csts.length ())
3046 t = known_csts[param_index];
3047
3048 if (t
3049 && TREE_CODE (t) == ADDR_EXPR
3050 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3051 return TREE_OPERAND (t, 0);
3052 else
3053 return NULL_TREE;
3054 }
3055
3056 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3057 return NULL_TREE;
3058
3059 gcc_assert (!ie->indirect_info->agg_contents);
3060 anc_offset = ie->indirect_info->offset;
3061
3062 t = NULL;
3063
3064 /* Try to work out value of virtual table pointer value in replacements. */
3065 if (!t && agg_reps && !ie->indirect_info->by_ref)
3066 {
3067 while (agg_reps)
3068 {
3069 if (agg_reps->index == param_index
3070 && agg_reps->offset == ie->indirect_info->offset
3071 && agg_reps->by_ref)
3072 {
3073 t = agg_reps->value;
3074 break;
3075 }
3076 agg_reps = agg_reps->next;
3077 }
3078 }
3079
3080 /* Try to work out value of virtual table pointer value in known
3081 aggregate values. */
3082 if (!t && known_aggs.length () > (unsigned int) param_index
3083 && !ie->indirect_info->by_ref)
3084 {
3085 const ipa_agg_value_set *agg = &known_aggs[param_index];
3086 t = ipa_find_agg_cst_for_param (agg,
3087 (unsigned) param_index
3088 < known_csts.length ()
3089 ? known_csts[param_index] : NULL,
3090 ie->indirect_info->offset, true);
3091 }
3092
3093 /* If we found the virtual table pointer, lookup the target. */
3094 if (t)
3095 {
3096 tree vtable;
3097 unsigned HOST_WIDE_INT offset;
3098 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3099 {
3100 bool can_refer;
3101 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3102 vtable, offset, &can_refer);
3103 if (can_refer)
3104 {
3105 if (!target
3106 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3107 || !possible_polymorphic_call_target_p
3108 (ie, cgraph_node::get (target)))
3109 {
3110 /* Do not speculate builtin_unreachable, it is stupid! */
3111 if (ie->indirect_info->vptr_changed)
3112 return NULL;
3113 target = ipa_impossible_devirt_target (ie, target);
3114 }
3115 *speculative = ie->indirect_info->vptr_changed;
3116 if (!*speculative)
3117 return target;
3118 }
3119 }
3120 }
3121
3122 /* Do we know the constant value of pointer? */
3123 if (!t && (unsigned) param_index < known_csts.length ())
3124 t = known_csts[param_index];
3125
3126 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3127
3128 ipa_polymorphic_call_context context;
3129 if (known_contexts.length () > (unsigned int) param_index)
3130 {
3131 context = known_contexts[param_index];
3132 context.offset_by (anc_offset);
3133 if (ie->indirect_info->vptr_changed)
3134 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3135 ie->indirect_info->otr_type);
3136 if (t)
3137 {
3138 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3139 (t, ie->indirect_info->otr_type, anc_offset);
3140 if (!ctx2.useless_p ())
3141 context.combine_with (ctx2, ie->indirect_info->otr_type);
3142 }
3143 }
3144 else if (t)
3145 {
3146 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3147 anc_offset);
3148 if (ie->indirect_info->vptr_changed)
3149 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3150 ie->indirect_info->otr_type);
3151 }
3152 else
3153 return NULL_TREE;
3154
3155 vec <cgraph_node *>targets;
3156 bool final;
3157
3158 targets = possible_polymorphic_call_targets
3159 (ie->indirect_info->otr_type,
3160 ie->indirect_info->otr_token,
3161 context, &final);
3162 if (!final || targets.length () > 1)
3163 {
3164 struct cgraph_node *node;
3165 if (*speculative)
3166 return target;
3167 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3168 || ie->speculative || !ie->maybe_hot_p ())
3169 return NULL;
3170 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3171 ie->indirect_info->otr_token,
3172 context);
3173 if (node)
3174 {
3175 *speculative = true;
3176 target = node->decl;
3177 }
3178 else
3179 return NULL;
3180 }
3181 else
3182 {
3183 *speculative = false;
3184 if (targets.length () == 1)
3185 target = targets[0]->decl;
3186 else
3187 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3188 }
3189
3190 if (target && !possible_polymorphic_call_target_p (ie,
3191 cgraph_node::get (target)))
3192 {
3193 if (*speculative)
3194 return NULL;
3195 target = ipa_impossible_devirt_target (ie, target);
3196 }
3197
3198 return target;
3199 }
3200
3201 /* If an indirect edge IE can be turned into a direct one based on data in
3202 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3203 whether the discovered target is only speculative guess. */
3204
3205 tree
ipa_get_indirect_edge_target(struct cgraph_edge * ie,ipa_call_arg_values * avals,bool * speculative)3206 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3207 ipa_call_arg_values *avals,
3208 bool *speculative)
3209 {
3210 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3211 avals->m_known_contexts,
3212 avals->m_known_aggs,
3213 NULL, speculative);
3214 }
3215
3216 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3217
3218 tree
ipa_get_indirect_edge_target(struct cgraph_edge * ie,ipa_auto_call_arg_values * avals,bool * speculative)3219 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3220 ipa_auto_call_arg_values *avals,
3221 bool *speculative)
3222 {
3223 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3224 avals->m_known_contexts,
3225 avals->m_known_aggs,
3226 NULL, speculative);
3227 }
3228
3229 /* Calculate devirtualization time bonus for NODE, assuming we know information
3230 about arguments stored in AVALS. */
3231
3232 static int
devirtualization_time_bonus(struct cgraph_node * node,ipa_auto_call_arg_values * avals)3233 devirtualization_time_bonus (struct cgraph_node *node,
3234 ipa_auto_call_arg_values *avals)
3235 {
3236 struct cgraph_edge *ie;
3237 int res = 0;
3238
3239 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3240 {
3241 struct cgraph_node *callee;
3242 class ipa_fn_summary *isummary;
3243 enum availability avail;
3244 tree target;
3245 bool speculative;
3246
3247 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3248 if (!target)
3249 continue;
3250
3251 /* Only bare minimum benefit for clearly un-inlineable targets. */
3252 res += 1;
3253 callee = cgraph_node::get (target);
3254 if (!callee || !callee->definition)
3255 continue;
3256 callee = callee->function_symbol (&avail);
3257 if (avail < AVAIL_AVAILABLE)
3258 continue;
3259 isummary = ipa_fn_summaries->get (callee);
3260 if (!isummary || !isummary->inlinable)
3261 continue;
3262
3263 int size = ipa_size_summaries->get (callee)->size;
3264 /* FIXME: The values below need re-considering and perhaps also
3265 integrating into the cost metrics, at lest in some very basic way. */
3266 int max_inline_insns_auto
3267 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3268 if (size <= max_inline_insns_auto / 4)
3269 res += 31 / ((int)speculative + 1);
3270 else if (size <= max_inline_insns_auto / 2)
3271 res += 15 / ((int)speculative + 1);
3272 else if (size <= max_inline_insns_auto
3273 || DECL_DECLARED_INLINE_P (callee->decl))
3274 res += 7 / ((int)speculative + 1);
3275 }
3276
3277 return res;
3278 }
3279
3280 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3281
3282 static int
hint_time_bonus(cgraph_node * node,const ipa_call_estimates & estimates)3283 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3284 {
3285 int result = 0;
3286 ipa_hints hints = estimates.hints;
3287 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3288 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3289
3290 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3291
3292 if (hints & INLINE_HINT_loop_iterations)
3293 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3294
3295 if (hints & INLINE_HINT_loop_stride)
3296 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3297
3298 return result;
3299 }
3300
3301 /* If there is a reason to penalize the function described by INFO in the
3302 cloning goodness evaluation, do so. */
3303
3304 static inline sreal
incorporate_penalties(cgraph_node * node,ipa_node_params * info,sreal evaluation)3305 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3306 sreal evaluation)
3307 {
3308 if (info->node_within_scc && !info->node_is_self_scc)
3309 evaluation = (evaluation
3310 * (100 - opt_for_fn (node->decl,
3311 param_ipa_cp_recursion_penalty))) / 100;
3312
3313 if (info->node_calling_single_call)
3314 evaluation = (evaluation
3315 * (100 - opt_for_fn (node->decl,
3316 param_ipa_cp_single_call_penalty)))
3317 / 100;
3318
3319 return evaluation;
3320 }
3321
3322 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3323 and SIZE_COST and with the sum of frequencies of incoming edges to the
3324 potential new clone in FREQUENCIES. */
3325
3326 static bool
good_cloning_opportunity_p(struct cgraph_node * node,sreal time_benefit,sreal freq_sum,profile_count count_sum,int size_cost)3327 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3328 sreal freq_sum, profile_count count_sum,
3329 int size_cost)
3330 {
3331 if (time_benefit == 0
3332 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3333 || node->optimize_for_size_p ())
3334 return false;
3335
3336 gcc_assert (size_cost > 0);
3337
3338 ipa_node_params *info = ipa_node_params_sum->get (node);
3339 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3340 if (count_sum > profile_count::zero ())
3341 {
3342 gcc_assert (base_count > profile_count::zero ());
3343 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3344 sreal evaluation = (time_benefit * factor) / size_cost;
3345 evaluation = incorporate_penalties (node, info, evaluation);
3346 evaluation *= 1000;
3347
3348 if (dump_file && (dump_flags & TDF_DETAILS))
3349 {
3350 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3351 "size: %i, count_sum: ", time_benefit.to_double (),
3352 size_cost);
3353 count_sum.dump (dump_file);
3354 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3355 info->node_within_scc
3356 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3357 info->node_calling_single_call ? ", single_call" : "",
3358 evaluation.to_double (), eval_threshold);
3359 }
3360
3361 return evaluation.to_int () >= eval_threshold;
3362 }
3363 else
3364 {
3365 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3366 evaluation = incorporate_penalties (node, info, evaluation);
3367 evaluation *= 1000;
3368
3369 if (dump_file && (dump_flags & TDF_DETAILS))
3370 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3371 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3372 "threshold: %i\n",
3373 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3374 info->node_within_scc
3375 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3376 info->node_calling_single_call ? ", single_call" : "",
3377 evaluation.to_double (), eval_threshold);
3378
3379 return evaluation.to_int () >= eval_threshold;
3380 }
3381 }
3382
3383 /* Return all context independent values from aggregate lattices in PLATS in a
3384 vector. Return NULL if there are none. */
3385
3386 static vec<ipa_agg_value>
context_independent_aggregate_values(class ipcp_param_lattices * plats)3387 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3388 {
3389 vec<ipa_agg_value> res = vNULL;
3390
3391 if (plats->aggs_bottom
3392 || plats->aggs_contain_variable
3393 || plats->aggs_count == 0)
3394 return vNULL;
3395
3396 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3397 aglat;
3398 aglat = aglat->next)
3399 if (aglat->is_single_const ())
3400 {
3401 struct ipa_agg_value item;
3402 item.offset = aglat->offset;
3403 item.value = aglat->values->value;
3404 res.safe_push (item);
3405 }
3406 return res;
3407 }
3408
3409 /* Grow vectors in AVALS and fill them with information about values of
3410 parameters that are known to be independent of the context. Only calculate
3411 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3412 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3413 parameters will be stored in it.
3414
3415 TODO: Also grow context independent value range vectors. */
3416
3417 static bool
gather_context_independent_values(class ipa_node_params * info,ipa_auto_call_arg_values * avals,bool calculate_aggs,int * removable_params_cost)3418 gather_context_independent_values (class ipa_node_params *info,
3419 ipa_auto_call_arg_values *avals,
3420 bool calculate_aggs,
3421 int *removable_params_cost)
3422 {
3423 int i, count = ipa_get_param_count (info);
3424 bool ret = false;
3425
3426 avals->m_known_vals.safe_grow_cleared (count, true);
3427 avals->m_known_contexts.safe_grow_cleared (count, true);
3428 if (calculate_aggs)
3429 avals->m_known_aggs.safe_grow_cleared (count, true);
3430
3431 if (removable_params_cost)
3432 *removable_params_cost = 0;
3433
3434 for (i = 0; i < count; i++)
3435 {
3436 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3437 ipcp_lattice<tree> *lat = &plats->itself;
3438
3439 if (lat->is_single_const ())
3440 {
3441 ipcp_value<tree> *val = lat->values;
3442 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3443 avals->m_known_vals[i] = val->value;
3444 if (removable_params_cost)
3445 *removable_params_cost
3446 += estimate_move_cost (TREE_TYPE (val->value), false);
3447 ret = true;
3448 }
3449 else if (removable_params_cost
3450 && !ipa_is_param_used (info, i))
3451 *removable_params_cost
3452 += ipa_get_param_move_cost (info, i);
3453
3454 if (!ipa_is_param_used (info, i))
3455 continue;
3456
3457 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3458 /* Do not account known context as reason for cloning. We can see
3459 if it permits devirtualization. */
3460 if (ctxlat->is_single_const ())
3461 avals->m_known_contexts[i] = ctxlat->values->value;
3462
3463 if (calculate_aggs)
3464 {
3465 vec<ipa_agg_value> agg_items;
3466 struct ipa_agg_value_set *agg;
3467
3468 agg_items = context_independent_aggregate_values (plats);
3469 agg = &avals->m_known_aggs[i];
3470 agg->items = agg_items;
3471 agg->by_ref = plats->aggs_by_ref;
3472 ret |= !agg_items.is_empty ();
3473 }
3474 }
3475
3476 return ret;
3477 }
3478
3479 /* Perform time and size measurement of NODE with the context given in AVALS,
3480 calculate the benefit compared to the node without specialization and store
3481 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3482 context-independent or unused removable parameters and EST_MOVE_COST, the
3483 estimated movement of the considered parameter. */
3484
3485 static void
perform_estimation_of_a_value(cgraph_node * node,ipa_auto_call_arg_values * avals,int removable_params_cost,int est_move_cost,ipcp_value_base * val)3486 perform_estimation_of_a_value (cgraph_node *node,
3487 ipa_auto_call_arg_values *avals,
3488 int removable_params_cost, int est_move_cost,
3489 ipcp_value_base *val)
3490 {
3491 sreal time_benefit;
3492 ipa_call_estimates estimates;
3493
3494 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3495
3496 /* Extern inline functions have no cloning local time benefits because they
3497 will be inlined anyway. The only reason to clone them is if it enables
3498 optimization in any of the functions they call. */
3499 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3500 time_benefit = 0;
3501 else
3502 time_benefit = (estimates.nonspecialized_time - estimates.time)
3503 + (devirtualization_time_bonus (node, avals)
3504 + hint_time_bonus (node, estimates)
3505 + removable_params_cost + est_move_cost);
3506
3507 int size = estimates.size;
3508 gcc_checking_assert (size >=0);
3509 /* The inliner-heuristics based estimates may think that in certain
3510 contexts some functions do not have any size at all but we want
3511 all specializations to have at least a tiny cost, not least not to
3512 divide by zero. */
3513 if (size == 0)
3514 size = 1;
3515
3516 val->local_time_benefit = time_benefit;
3517 val->local_size_cost = size;
3518 }
3519
3520 /* Get the overall limit oof growth based on parameters extracted from growth.
3521 it does not really make sense to mix functions with different overall growth
3522 limits but it is possible and if it happens, we do not want to select one
3523 limit at random. */
3524
3525 static long
get_max_overall_size(cgraph_node * node)3526 get_max_overall_size (cgraph_node *node)
3527 {
3528 long max_new_size = orig_overall_size;
3529 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3530 if (max_new_size < large_unit)
3531 max_new_size = large_unit;
3532 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3533 max_new_size += max_new_size * unit_growth / 100 + 1;
3534 return max_new_size;
3535 }
3536
3537 /* Iterate over known values of parameters of NODE and estimate the local
3538 effects in terms of time and size they have. */
3539
3540 static void
estimate_local_effects(struct cgraph_node * node)3541 estimate_local_effects (struct cgraph_node *node)
3542 {
3543 ipa_node_params *info = ipa_node_params_sum->get (node);
3544 int i, count = ipa_get_param_count (info);
3545 bool always_const;
3546 int removable_params_cost;
3547
3548 if (!count || !ipcp_versionable_function_p (node))
3549 return;
3550
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3552 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3553
3554 ipa_auto_call_arg_values avals;
3555 always_const = gather_context_independent_values (info, &avals, true,
3556 &removable_params_cost);
3557 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3558 if (always_const || devirt_bonus
3559 || (removable_params_cost && node->can_change_signature))
3560 {
3561 struct caller_statistics stats;
3562 ipa_call_estimates estimates;
3563
3564 init_caller_stats (&stats);
3565 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3566 false);
3567 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3568 sreal time = estimates.nonspecialized_time - estimates.time;
3569 time += devirt_bonus;
3570 time += hint_time_bonus (node, estimates);
3571 time += removable_params_cost;
3572 int size = estimates.size - stats.n_calls * removable_params_cost;
3573
3574 if (dump_file)
3575 fprintf (dump_file, " - context independent values, size: %i, "
3576 "time_benefit: %f\n", size, (time).to_double ());
3577
3578 if (size <= 0 || node->local)
3579 {
3580 info->do_clone_for_all_contexts = true;
3581
3582 if (dump_file)
3583 fprintf (dump_file, " Decided to specialize for all "
3584 "known contexts, code not going to grow.\n");
3585 }
3586 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3587 stats.count_sum, size))
3588 {
3589 if (size + overall_size <= get_max_overall_size (node))
3590 {
3591 info->do_clone_for_all_contexts = true;
3592 overall_size += size;
3593
3594 if (dump_file)
3595 fprintf (dump_file, " Decided to specialize for all "
3596 "known contexts, growth (to %li) deemed "
3597 "beneficial.\n", overall_size);
3598 }
3599 else if (dump_file && (dump_flags & TDF_DETAILS))
3600 fprintf (dump_file, " Not cloning for all contexts because "
3601 "maximum unit size would be reached with %li.\n",
3602 size + overall_size);
3603 }
3604 else if (dump_file && (dump_flags & TDF_DETAILS))
3605 fprintf (dump_file, " Not cloning for all contexts because "
3606 "!good_cloning_opportunity_p.\n");
3607
3608 }
3609
3610 for (i = 0; i < count; i++)
3611 {
3612 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3613 ipcp_lattice<tree> *lat = &plats->itself;
3614 ipcp_value<tree> *val;
3615
3616 if (lat->bottom
3617 || !lat->values
3618 || avals.m_known_vals[i])
3619 continue;
3620
3621 for (val = lat->values; val; val = val->next)
3622 {
3623 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3624 avals.m_known_vals[i] = val->value;
3625
3626 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3627 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3628 emc, val);
3629
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3631 {
3632 fprintf (dump_file, " - estimates for value ");
3633 print_ipcp_constant_value (dump_file, val->value);
3634 fprintf (dump_file, " for ");
3635 ipa_dump_param (dump_file, info, i);
3636 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3637 val->local_time_benefit.to_double (),
3638 val->local_size_cost);
3639 }
3640 }
3641 avals.m_known_vals[i] = NULL_TREE;
3642 }
3643
3644 for (i = 0; i < count; i++)
3645 {
3646 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3647
3648 if (!plats->virt_call)
3649 continue;
3650
3651 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3652 ipcp_value<ipa_polymorphic_call_context> *val;
3653
3654 if (ctxlat->bottom
3655 || !ctxlat->values
3656 || !avals.m_known_contexts[i].useless_p ())
3657 continue;
3658
3659 for (val = ctxlat->values; val; val = val->next)
3660 {
3661 avals.m_known_contexts[i] = val->value;
3662 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3663 0, val);
3664
3665 if (dump_file && (dump_flags & TDF_DETAILS))
3666 {
3667 fprintf (dump_file, " - estimates for polymorphic context ");
3668 print_ipcp_constant_value (dump_file, val->value);
3669 fprintf (dump_file, " for ");
3670 ipa_dump_param (dump_file, info, i);
3671 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3672 val->local_time_benefit.to_double (),
3673 val->local_size_cost);
3674 }
3675 }
3676 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3677 }
3678
3679 for (i = 0; i < count; i++)
3680 {
3681 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3682
3683 if (plats->aggs_bottom || !plats->aggs)
3684 continue;
3685
3686 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3687 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3688 {
3689 ipcp_value<tree> *val;
3690 if (aglat->bottom || !aglat->values
3691 /* If the following is true, the one value is in known_aggs. */
3692 || (!plats->aggs_contain_variable
3693 && aglat->is_single_const ()))
3694 continue;
3695
3696 for (val = aglat->values; val; val = val->next)
3697 {
3698 struct ipa_agg_value item;
3699
3700 item.offset = aglat->offset;
3701 item.value = val->value;
3702 agg->items.safe_push (item);
3703
3704 perform_estimation_of_a_value (node, &avals,
3705 removable_params_cost, 0, val);
3706
3707 if (dump_file && (dump_flags & TDF_DETAILS))
3708 {
3709 fprintf (dump_file, " - estimates for value ");
3710 print_ipcp_constant_value (dump_file, val->value);
3711 fprintf (dump_file, " for ");
3712 ipa_dump_param (dump_file, info, i);
3713 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3714 "]: time_benefit: %g, size: %i\n",
3715 plats->aggs_by_ref ? "ref " : "",
3716 aglat->offset,
3717 val->local_time_benefit.to_double (),
3718 val->local_size_cost);
3719 }
3720
3721 agg->items.pop ();
3722 }
3723 }
3724 }
3725 }
3726
3727
3728 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3729 topological sort of values. */
3730
3731 template <typename valtype>
3732 void
add_val(ipcp_value<valtype> * cur_val)3733 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3734 {
3735 ipcp_value_source<valtype> *src;
3736
3737 if (cur_val->dfs)
3738 return;
3739
3740 dfs_counter++;
3741 cur_val->dfs = dfs_counter;
3742 cur_val->low_link = dfs_counter;
3743
3744 cur_val->topo_next = stack;
3745 stack = cur_val;
3746 cur_val->on_stack = true;
3747
3748 for (src = cur_val->sources; src; src = src->next)
3749 if (src->val)
3750 {
3751 if (src->val->dfs == 0)
3752 {
3753 add_val (src->val);
3754 if (src->val->low_link < cur_val->low_link)
3755 cur_val->low_link = src->val->low_link;
3756 }
3757 else if (src->val->on_stack
3758 && src->val->dfs < cur_val->low_link)
3759 cur_val->low_link = src->val->dfs;
3760 }
3761
3762 if (cur_val->dfs == cur_val->low_link)
3763 {
3764 ipcp_value<valtype> *v, *scc_list = NULL;
3765
3766 do
3767 {
3768 v = stack;
3769 stack = v->topo_next;
3770 v->on_stack = false;
3771 v->scc_no = cur_val->dfs;
3772
3773 v->scc_next = scc_list;
3774 scc_list = v;
3775 }
3776 while (v != cur_val);
3777
3778 cur_val->topo_next = values_topo;
3779 values_topo = cur_val;
3780 }
3781 }
3782
3783 /* Add all values in lattices associated with NODE to the topological sort if
3784 they are not there yet. */
3785
3786 static void
add_all_node_vals_to_toposort(cgraph_node * node,ipa_topo_info * topo)3787 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3788 {
3789 ipa_node_params *info = ipa_node_params_sum->get (node);
3790 int i, count = ipa_get_param_count (info);
3791
3792 for (i = 0; i < count; i++)
3793 {
3794 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3795 ipcp_lattice<tree> *lat = &plats->itself;
3796 struct ipcp_agg_lattice *aglat;
3797
3798 if (!lat->bottom)
3799 {
3800 ipcp_value<tree> *val;
3801 for (val = lat->values; val; val = val->next)
3802 topo->constants.add_val (val);
3803 }
3804
3805 if (!plats->aggs_bottom)
3806 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3807 if (!aglat->bottom)
3808 {
3809 ipcp_value<tree> *val;
3810 for (val = aglat->values; val; val = val->next)
3811 topo->constants.add_val (val);
3812 }
3813
3814 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3815 if (!ctxlat->bottom)
3816 {
3817 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3818 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3819 topo->contexts.add_val (ctxval);
3820 }
3821 }
3822 }
3823
3824 /* One pass of constants propagation along the call graph edges, from callers
3825 to callees (requires topological ordering in TOPO), iterate over strongly
3826 connected components. */
3827
3828 static void
propagate_constants_topo(class ipa_topo_info * topo)3829 propagate_constants_topo (class ipa_topo_info *topo)
3830 {
3831 int i;
3832
3833 for (i = topo->nnodes - 1; i >= 0; i--)
3834 {
3835 unsigned j;
3836 struct cgraph_node *v, *node = topo->order[i];
3837 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3838
3839 /* First, iteratively propagate within the strongly connected component
3840 until all lattices stabilize. */
3841 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3842 if (v->has_gimple_body_p ())
3843 {
3844 if (opt_for_fn (v->decl, flag_ipa_cp)
3845 && opt_for_fn (v->decl, optimize))
3846 push_node_to_stack (topo, v);
3847 /* When V is not optimized, we can not push it to stack, but
3848 still we need to set all its callees lattices to bottom. */
3849 else
3850 {
3851 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3852 propagate_constants_across_call (cs);
3853 }
3854 }
3855
3856 v = pop_node_from_stack (topo);
3857 while (v)
3858 {
3859 struct cgraph_edge *cs;
3860 class ipa_node_params *info = NULL;
3861 bool self_scc = true;
3862
3863 for (cs = v->callees; cs; cs = cs->next_callee)
3864 if (ipa_edge_within_scc (cs))
3865 {
3866 cgraph_node *callee = cs->callee->function_symbol ();
3867
3868 if (v != callee)
3869 self_scc = false;
3870
3871 if (!info)
3872 {
3873 info = ipa_node_params_sum->get (v);
3874 info->node_within_scc = true;
3875 }
3876
3877 if (propagate_constants_across_call (cs))
3878 push_node_to_stack (topo, callee);
3879 }
3880
3881 if (info)
3882 info->node_is_self_scc = self_scc;
3883
3884 v = pop_node_from_stack (topo);
3885 }
3886
3887 /* Afterwards, propagate along edges leading out of the SCC, calculates
3888 the local effects of the discovered constants and all valid values to
3889 their topological sort. */
3890 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3891 if (v->has_gimple_body_p ()
3892 && opt_for_fn (v->decl, flag_ipa_cp)
3893 && opt_for_fn (v->decl, optimize))
3894 {
3895 struct cgraph_edge *cs;
3896
3897 estimate_local_effects (v);
3898 add_all_node_vals_to_toposort (v, topo);
3899 for (cs = v->callees; cs; cs = cs->next_callee)
3900 if (!ipa_edge_within_scc (cs))
3901 propagate_constants_across_call (cs);
3902 }
3903 cycle_nodes.release ();
3904 }
3905 }
3906
3907 /* Propagate the estimated effects of individual values along the topological
3908 from the dependent values to those they depend on. */
3909
3910 template <typename valtype>
3911 void
propagate_effects()3912 value_topo_info<valtype>::propagate_effects ()
3913 {
3914 ipcp_value<valtype> *base;
3915 hash_set<ipcp_value<valtype> *> processed_srcvals;
3916
3917 for (base = values_topo; base; base = base->topo_next)
3918 {
3919 ipcp_value_source<valtype> *src;
3920 ipcp_value<valtype> *val;
3921 sreal time = 0;
3922 HOST_WIDE_INT size = 0;
3923
3924 for (val = base; val; val = val->scc_next)
3925 {
3926 time = time + val->local_time_benefit + val->prop_time_benefit;
3927 size = size + val->local_size_cost + val->prop_size_cost;
3928 }
3929
3930 for (val = base; val; val = val->scc_next)
3931 {
3932 processed_srcvals.empty ();
3933 for (src = val->sources; src; src = src->next)
3934 if (src->val
3935 && src->cs->maybe_hot_p ())
3936 {
3937 if (!processed_srcvals.add (src->val))
3938 {
3939 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3940 if (prop_size < INT_MAX)
3941 src->val->prop_size_cost = prop_size;
3942 else
3943 continue;
3944 }
3945
3946 int special_factor = 1;
3947 if (val->same_scc (src->val))
3948 special_factor
3949 = opt_for_fn(src->cs->caller->decl,
3950 param_ipa_cp_recursive_freq_factor);
3951 else if (val->self_recursion_generated_p ()
3952 && (src->cs->callee->function_symbol ()
3953 == src->cs->caller))
3954 {
3955 int max_recur_gen_depth
3956 = opt_for_fn(src->cs->caller->decl,
3957 param_ipa_cp_max_recursive_depth);
3958 special_factor = max_recur_gen_depth
3959 - val->self_recursion_generated_level + 1;
3960 }
3961
3962 src->val->prop_time_benefit
3963 += time * special_factor * src->cs->sreal_frequency ();
3964 }
3965
3966 if (size < INT_MAX)
3967 {
3968 val->prop_time_benefit = time;
3969 val->prop_size_cost = size;
3970 }
3971 else
3972 {
3973 val->prop_time_benefit = 0;
3974 val->prop_size_cost = 0;
3975 }
3976 }
3977 }
3978 }
3979
3980 /* Callback for qsort to sort counts of all edges. */
3981
3982 static int
compare_edge_profile_counts(const void * a,const void * b)3983 compare_edge_profile_counts (const void *a, const void *b)
3984 {
3985 const profile_count *cnt1 = (const profile_count *) a;
3986 const profile_count *cnt2 = (const profile_count *) b;
3987
3988 if (*cnt1 < *cnt2)
3989 return 1;
3990 if (*cnt1 > *cnt2)
3991 return -1;
3992 return 0;
3993 }
3994
3995
3996 /* Propagate constants, polymorphic contexts and their effects from the
3997 summaries interprocedurally. */
3998
3999 static void
ipcp_propagate_stage(class ipa_topo_info * topo)4000 ipcp_propagate_stage (class ipa_topo_info *topo)
4001 {
4002 struct cgraph_node *node;
4003
4004 if (dump_file)
4005 fprintf (dump_file, "\n Propagating constants:\n\n");
4006
4007 base_count = profile_count::uninitialized ();
4008
4009 bool compute_count_base = false;
4010 unsigned base_count_pos_percent = 0;
4011 FOR_EACH_DEFINED_FUNCTION (node)
4012 {
4013 if (node->has_gimple_body_p ()
4014 && opt_for_fn (node->decl, flag_ipa_cp)
4015 && opt_for_fn (node->decl, optimize))
4016 {
4017 ipa_node_params *info = ipa_node_params_sum->get (node);
4018 determine_versionability (node, info);
4019
4020 unsigned nlattices = ipa_get_param_count (info);
4021 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4022 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4023 initialize_node_lattices (node);
4024 }
4025 ipa_size_summary *s = ipa_size_summaries->get (node);
4026 if (node->definition && !node->alias && s != NULL)
4027 overall_size += s->self_size;
4028 if (node->count.ipa ().initialized_p ())
4029 {
4030 compute_count_base = true;
4031 unsigned pos_percent = opt_for_fn (node->decl,
4032 param_ipa_cp_profile_count_base);
4033 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4034 }
4035 }
4036
4037 if (compute_count_base)
4038 {
4039 auto_vec<profile_count> all_edge_counts;
4040 all_edge_counts.reserve_exact (symtab->edges_count);
4041 FOR_EACH_DEFINED_FUNCTION (node)
4042 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4043 {
4044 profile_count count = cs->count.ipa ();
4045 if (!(count > profile_count::zero ()))
4046 continue;
4047
4048 enum availability avail;
4049 cgraph_node *tgt
4050 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4051 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4052 if (info && info->versionable)
4053 all_edge_counts.quick_push (count);
4054 }
4055
4056 if (!all_edge_counts.is_empty ())
4057 {
4058 gcc_assert (base_count_pos_percent <= 100);
4059 all_edge_counts.qsort (compare_edge_profile_counts);
4060
4061 unsigned base_count_pos
4062 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4063 base_count = all_edge_counts[base_count_pos];
4064
4065 if (dump_file)
4066 {
4067 fprintf (dump_file, "\nSelected base_count from %u edges at "
4068 "position %u, arriving at: ", all_edge_counts.length (),
4069 base_count_pos);
4070 base_count.dump (dump_file);
4071 fprintf (dump_file, "\n");
4072 }
4073 }
4074 else if (dump_file)
4075 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4076 "continuing as if without profile feedback.\n");
4077 }
4078
4079 orig_overall_size = overall_size;
4080
4081 if (dump_file)
4082 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4083
4084 propagate_constants_topo (topo);
4085 if (flag_checking)
4086 ipcp_verify_propagated_values ();
4087 topo->constants.propagate_effects ();
4088 topo->contexts.propagate_effects ();
4089
4090 if (dump_file)
4091 {
4092 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4093 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4094 }
4095 }
4096
4097 /* Discover newly direct outgoing edges from NODE which is a new clone with
4098 known KNOWN_CSTS and make them direct. */
4099
4100 static void
ipcp_discover_new_direct_edges(struct cgraph_node * node,vec<tree> known_csts,vec<ipa_polymorphic_call_context> known_contexts,struct ipa_agg_replacement_value * aggvals)4101 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4102 vec<tree> known_csts,
4103 vec<ipa_polymorphic_call_context>
4104 known_contexts,
4105 struct ipa_agg_replacement_value *aggvals)
4106 {
4107 struct cgraph_edge *ie, *next_ie;
4108 bool found = false;
4109
4110 for (ie = node->indirect_calls; ie; ie = next_ie)
4111 {
4112 tree target;
4113 bool speculative;
4114
4115 next_ie = ie->next_callee;
4116 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4117 vNULL, aggvals, &speculative);
4118 if (target)
4119 {
4120 bool agg_contents = ie->indirect_info->agg_contents;
4121 bool polymorphic = ie->indirect_info->polymorphic;
4122 int param_index = ie->indirect_info->param_index;
4123 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4124 speculative);
4125 found = true;
4126
4127 if (cs && !agg_contents && !polymorphic)
4128 {
4129 ipa_node_params *info = ipa_node_params_sum->get (node);
4130 int c = ipa_get_controlled_uses (info, param_index);
4131 if (c != IPA_UNDESCRIBED_USE
4132 && !ipa_get_param_load_dereferenced (info, param_index))
4133 {
4134 struct ipa_ref *to_del;
4135
4136 c--;
4137 ipa_set_controlled_uses (info, param_index, c);
4138 if (dump_file && (dump_flags & TDF_DETAILS))
4139 fprintf (dump_file, " controlled uses count of param "
4140 "%i bumped down to %i\n", param_index, c);
4141 if (c == 0
4142 && (to_del = node->find_reference (cs->callee, NULL, 0,
4143 IPA_REF_ADDR)))
4144 {
4145 if (dump_file && (dump_flags & TDF_DETAILS))
4146 fprintf (dump_file, " and even removing its "
4147 "cloning-created reference\n");
4148 to_del->remove_reference ();
4149 }
4150 }
4151 }
4152 }
4153 }
4154 /* Turning calls to direct calls will improve overall summary. */
4155 if (found)
4156 ipa_update_overall_fn_summary (node);
4157 }
4158
4159 class edge_clone_summary;
4160 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4161
4162 /* Edge clone summary. */
4163
4164 class edge_clone_summary
4165 {
4166 public:
4167 /* Default constructor. */
edge_clone_summary()4168 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4169
4170 /* Default destructor. */
~edge_clone_summary()4171 ~edge_clone_summary ()
4172 {
4173 if (prev_clone)
4174 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4175 if (next_clone)
4176 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4177 }
4178
4179 cgraph_edge *prev_clone;
4180 cgraph_edge *next_clone;
4181 };
4182
4183 class edge_clone_summary_t:
4184 public call_summary <edge_clone_summary *>
4185 {
4186 public:
edge_clone_summary_t(symbol_table * symtab)4187 edge_clone_summary_t (symbol_table *symtab):
4188 call_summary <edge_clone_summary *> (symtab)
4189 {
4190 m_initialize_when_cloning = true;
4191 }
4192
4193 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4194 edge_clone_summary *src_data,
4195 edge_clone_summary *dst_data);
4196 };
4197
4198 /* Edge duplication hook. */
4199
4200 void
duplicate(cgraph_edge * src_edge,cgraph_edge * dst_edge,edge_clone_summary * src_data,edge_clone_summary * dst_data)4201 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4202 edge_clone_summary *src_data,
4203 edge_clone_summary *dst_data)
4204 {
4205 if (src_data->next_clone)
4206 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4207 dst_data->prev_clone = src_edge;
4208 dst_data->next_clone = src_data->next_clone;
4209 src_data->next_clone = dst_edge;
4210 }
4211
4212 /* Return true is CS calls DEST or its clone for all contexts. When
4213 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4214 edges from/to an all-context clone. */
4215
4216 static bool
calls_same_node_or_its_all_contexts_clone_p(cgraph_edge * cs,cgraph_node * dest,bool allow_recursion_to_clone)4217 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4218 bool allow_recursion_to_clone)
4219 {
4220 enum availability availability;
4221 cgraph_node *callee = cs->callee->function_symbol (&availability);
4222
4223 if (availability <= AVAIL_INTERPOSABLE)
4224 return false;
4225 if (callee == dest)
4226 return true;
4227 if (!allow_recursion_to_clone && cs->caller == callee)
4228 return false;
4229
4230 ipa_node_params *info = ipa_node_params_sum->get (callee);
4231 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4232 }
4233
4234 /* Return true if edge CS does bring about the value described by SRC to
4235 DEST_VAL of node DEST or its clone for all contexts. */
4236
4237 static bool
cgraph_edge_brings_value_p(cgraph_edge * cs,ipcp_value_source<tree> * src,cgraph_node * dest,ipcp_value<tree> * dest_val)4238 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4239 cgraph_node *dest, ipcp_value<tree> *dest_val)
4240 {
4241 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4242
4243 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4244 || caller_info->node_dead)
4245 return false;
4246
4247 if (!src->val)
4248 return true;
4249
4250 if (caller_info->ipcp_orig_node)
4251 {
4252 tree t;
4253 if (src->offset == -1)
4254 t = caller_info->known_csts[src->index];
4255 else
4256 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4257 return (t != NULL_TREE
4258 && values_equal_for_ipcp_p (src->val->value, t));
4259 }
4260 else
4261 {
4262 if (src->val == dest_val)
4263 return true;
4264
4265 struct ipcp_agg_lattice *aglat;
4266 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4267 src->index);
4268 if (src->offset == -1)
4269 return (plats->itself.is_single_const ()
4270 && values_equal_for_ipcp_p (src->val->value,
4271 plats->itself.values->value));
4272 else
4273 {
4274 if (plats->aggs_bottom || plats->aggs_contain_variable)
4275 return false;
4276 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4277 if (aglat->offset == src->offset)
4278 return (aglat->is_single_const ()
4279 && values_equal_for_ipcp_p (src->val->value,
4280 aglat->values->value));
4281 }
4282 return false;
4283 }
4284 }
4285
4286 /* Return true if edge CS does bring about the value described by SRC to
4287 DST_VAL of node DEST or its clone for all contexts. */
4288
4289 static bool
cgraph_edge_brings_value_p(cgraph_edge * cs,ipcp_value_source<ipa_polymorphic_call_context> * src,cgraph_node * dest,ipcp_value<ipa_polymorphic_call_context> *)4290 cgraph_edge_brings_value_p (cgraph_edge *cs,
4291 ipcp_value_source<ipa_polymorphic_call_context> *src,
4292 cgraph_node *dest,
4293 ipcp_value<ipa_polymorphic_call_context> *)
4294 {
4295 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4296
4297 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4298 || caller_info->node_dead)
4299 return false;
4300 if (!src->val)
4301 return true;
4302
4303 if (caller_info->ipcp_orig_node)
4304 return (caller_info->known_contexts.length () > (unsigned) src->index)
4305 && values_equal_for_ipcp_p (src->val->value,
4306 caller_info->known_contexts[src->index]);
4307
4308 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4309 src->index);
4310 return plats->ctxlat.is_single_const ()
4311 && values_equal_for_ipcp_p (src->val->value,
4312 plats->ctxlat.values->value);
4313 }
4314
4315 /* Get the next clone in the linked list of clones of an edge. */
4316
4317 static inline struct cgraph_edge *
get_next_cgraph_edge_clone(struct cgraph_edge * cs)4318 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4319 {
4320 edge_clone_summary *s = edge_clone_summaries->get (cs);
4321 return s != NULL ? s->next_clone : NULL;
4322 }
4323
4324 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4325 of them is viable and hot, return true. In that case, for those that still
4326 hold, add their edge frequency and their number and cumulative profile
4327 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4328 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4329
4330 template <typename valtype>
4331 static bool
get_info_about_necessary_edges(ipcp_value<valtype> * val,cgraph_node * dest,sreal * freq_sum,int * caller_count,profile_count * rec_count_sum,profile_count * nonrec_count_sum)4332 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4333 sreal *freq_sum, int *caller_count,
4334 profile_count *rec_count_sum,
4335 profile_count *nonrec_count_sum)
4336 {
4337 ipcp_value_source<valtype> *src;
4338 sreal freq = 0;
4339 int count = 0;
4340 profile_count rec_cnt = profile_count::zero ();
4341 profile_count nonrec_cnt = profile_count::zero ();
4342 bool hot = false;
4343 bool non_self_recursive = false;
4344
4345 for (src = val->sources; src; src = src->next)
4346 {
4347 struct cgraph_edge *cs = src->cs;
4348 while (cs)
4349 {
4350 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4351 {
4352 count++;
4353 freq += cs->sreal_frequency ();
4354 hot |= cs->maybe_hot_p ();
4355 if (cs->caller != dest)
4356 {
4357 non_self_recursive = true;
4358 if (cs->count.ipa ().initialized_p ())
4359 rec_cnt += cs->count.ipa ();
4360 }
4361 else if (cs->count.ipa ().initialized_p ())
4362 nonrec_cnt += cs->count.ipa ();
4363 }
4364 cs = get_next_cgraph_edge_clone (cs);
4365 }
4366 }
4367
4368 /* If the only edges bringing a value are self-recursive ones, do not bother
4369 evaluating it. */
4370 if (!non_self_recursive)
4371 return false;
4372
4373 *freq_sum = freq;
4374 *caller_count = count;
4375 *rec_count_sum = rec_cnt;
4376 *nonrec_count_sum = nonrec_cnt;
4377
4378 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4379 {
4380 struct cgraph_edge *cs;
4381
4382 /* Cold non-SCC source edge could trigger hot recursive execution of
4383 function. Consider the case as hot and rely on following cost model
4384 computation to further select right one. */
4385 for (cs = dest->callers; cs; cs = cs->next_caller)
4386 if (cs->caller == dest && cs->maybe_hot_p ())
4387 return true;
4388 }
4389
4390 return hot;
4391 }
4392
4393 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4394 to let a non-self-recursive caller be the first element. Thus, we can
4395 simplify intersecting operations on values that arrive from all of these
4396 callers, especially when there exists self-recursive call. Return true if
4397 this kind of adjustment is possible. */
4398
4399 static bool
adjust_callers_for_value_intersection(vec<cgraph_edge * > & callers,cgraph_node * node)4400 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4401 cgraph_node *node)
4402 {
4403 for (unsigned i = 0; i < callers.length (); i++)
4404 {
4405 cgraph_edge *cs = callers[i];
4406
4407 if (cs->caller != node)
4408 {
4409 if (i > 0)
4410 {
4411 callers[i] = callers[0];
4412 callers[0] = cs;
4413 }
4414 return true;
4415 }
4416 }
4417 return false;
4418 }
4419
4420 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4421 is assumed their number is known and equal to CALLER_COUNT. */
4422
4423 template <typename valtype>
4424 static vec<cgraph_edge *>
gather_edges_for_value(ipcp_value<valtype> * val,cgraph_node * dest,int caller_count)4425 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4426 int caller_count)
4427 {
4428 ipcp_value_source<valtype> *src;
4429 vec<cgraph_edge *> ret;
4430
4431 ret.create (caller_count);
4432 for (src = val->sources; src; src = src->next)
4433 {
4434 struct cgraph_edge *cs = src->cs;
4435 while (cs)
4436 {
4437 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4438 ret.quick_push (cs);
4439 cs = get_next_cgraph_edge_clone (cs);
4440 }
4441 }
4442
4443 if (caller_count > 1)
4444 adjust_callers_for_value_intersection (ret, dest);
4445
4446 return ret;
4447 }
4448
4449 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4450 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4451 should be set to true when the reference created for the constant should be
4452 a load one and not an address one because the corresponding parameter p is
4453 only used as *p. */
4454
4455 static struct ipa_replace_map *
get_replacement_map(class ipa_node_params * info,tree value,int parm_num,bool force_load_ref)4456 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4457 bool force_load_ref)
4458 {
4459 struct ipa_replace_map *replace_map;
4460
4461 replace_map = ggc_alloc<ipa_replace_map> ();
4462 if (dump_file)
4463 {
4464 fprintf (dump_file, " replacing ");
4465 ipa_dump_param (dump_file, info, parm_num);
4466
4467 fprintf (dump_file, " with const ");
4468 print_generic_expr (dump_file, value);
4469
4470 if (force_load_ref)
4471 fprintf (dump_file, " - forcing load reference\n");
4472 else
4473 fprintf (dump_file, "\n");
4474 }
4475 replace_map->parm_num = parm_num;
4476 replace_map->new_tree = value;
4477 replace_map->force_load_ref = force_load_ref;
4478 return replace_map;
4479 }
4480
4481 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4482 one, otherwise it will be referred to as the original node. */
4483
4484 static void
dump_profile_updates(cgraph_node * node,bool spec)4485 dump_profile_updates (cgraph_node *node, bool spec)
4486 {
4487 if (spec)
4488 fprintf (dump_file, " setting count of the specialized node %s to ",
4489 node->dump_name ());
4490 else
4491 fprintf (dump_file, " setting count of the original node %s to ",
4492 node->dump_name ());
4493
4494 node->count.dump (dump_file);
4495 fprintf (dump_file, "\n");
4496 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4497 {
4498 fprintf (dump_file, " edge to %s has count ",
4499 cs->callee->dump_name ());
4500 cs->count.dump (dump_file);
4501 fprintf (dump_file, "\n");
4502 }
4503 }
4504
4505 /* With partial train run we do not want to assume that original's count is
4506 zero whenever we redurect all executed edges to clone. Simply drop profile
4507 to local one in this case. In eany case, return the new value. ORIG_NODE
4508 is the original node and its count has not been updaed yet. */
4509
4510 profile_count
lenient_count_portion_handling(profile_count remainder,cgraph_node * orig_node)4511 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4512 {
4513 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4514 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4515 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4516 remainder = remainder.guessed_local ();
4517
4518 return remainder;
4519 }
4520
4521 /* Structure to sum counts coming from nodes other than the original node and
4522 its clones. */
4523
4524 struct gather_other_count_struct
4525 {
4526 cgraph_node *orig;
4527 profile_count other_count;
4528 };
4529
4530 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4531 counts that come from non-self-recursive calls.. */
4532
4533 static bool
gather_count_of_non_rec_edges(cgraph_node * node,void * data)4534 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4535 {
4536 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4537 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4538 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4539 desc->other_count += cs->count.ipa ();
4540 return false;
4541 }
4542
4543 /* Structure to help analyze if we need to boost counts of some clones of some
4544 non-recursive edges to match the new callee count. */
4545
4546 struct desc_incoming_count_struct
4547 {
4548 cgraph_node *orig;
4549 hash_set <cgraph_edge *> *processed_edges;
4550 profile_count count;
4551 unsigned unproc_orig_rec_edges;
4552 };
4553
4554 /* Go over edges calling NODE and its thunks and gather information about
4555 incoming counts so that we know if we need to make any adjustments. */
4556
4557 static void
analyze_clone_icoming_counts(cgraph_node * node,desc_incoming_count_struct * desc)4558 analyze_clone_icoming_counts (cgraph_node *node,
4559 desc_incoming_count_struct *desc)
4560 {
4561 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4562 if (cs->caller->thunk)
4563 {
4564 analyze_clone_icoming_counts (cs->caller, desc);
4565 continue;
4566 }
4567 else
4568 {
4569 if (cs->count.initialized_p ())
4570 desc->count += cs->count.ipa ();
4571 if (!desc->processed_edges->contains (cs)
4572 && cs->caller->clone_of == desc->orig)
4573 desc->unproc_orig_rec_edges++;
4574 }
4575 }
4576
4577 /* If caller edge counts of a clone created for a self-recursive arithmetic
4578 jump function must be adjusted because it is coming from a the "seed" clone
4579 for the first value and so has been excessively scaled back as if it was not
4580 a recursive call, adjust it so that the incoming counts of NODE match its
4581 count. NODE is the node or its thunk. */
4582
4583 static void
adjust_clone_incoming_counts(cgraph_node * node,desc_incoming_count_struct * desc)4584 adjust_clone_incoming_counts (cgraph_node *node,
4585 desc_incoming_count_struct *desc)
4586 {
4587 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4588 if (cs->caller->thunk)
4589 {
4590 adjust_clone_incoming_counts (cs->caller, desc);
4591 profile_count sum = profile_count::zero ();
4592 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4593 if (e->count.initialized_p ())
4594 sum += e->count.ipa ();
4595 cs->count = cs->count.combine_with_ipa_count (sum);
4596 }
4597 else if (!desc->processed_edges->contains (cs)
4598 && cs->caller->clone_of == desc->orig)
4599 {
4600 cs->count += desc->count;
4601 if (dump_file)
4602 {
4603 fprintf (dump_file, " Adjusted count of an incoming edge of "
4604 "a clone %s -> %s to ", cs->caller->dump_name (),
4605 cs->callee->dump_name ());
4606 cs->count.dump (dump_file);
4607 fprintf (dump_file, "\n");
4608 }
4609 }
4610 }
4611
4612 /* When ORIG_NODE has been cloned for values which have been generated fora
4613 self-recursive call as a result of an arithmetic pass-through
4614 jump-functions, adjust its count together with counts of all such clones in
4615 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4616
4617 The function sums the counts of the original node and all its clones that
4618 cannot be attributed to a specific clone because it comes from a
4619 non-recursive edge. This sum is then evenly divided between the clones and
4620 on top of that each one gets all the counts which can be attributed directly
4621 to it. */
4622
4623 static void
update_counts_for_self_gen_clones(cgraph_node * orig_node,const vec<cgraph_node * > & self_gen_clones)4624 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4625 const vec<cgraph_node *> &self_gen_clones)
4626 {
4627 profile_count redist_sum = orig_node->count.ipa ();
4628 if (!(redist_sum > profile_count::zero ()))
4629 return;
4630
4631 if (dump_file)
4632 fprintf (dump_file, " Updating profile of self recursive clone "
4633 "series\n");
4634
4635 gather_other_count_struct gocs;
4636 gocs.orig = orig_node;
4637 gocs.other_count = profile_count::zero ();
4638
4639 auto_vec <profile_count, 8> other_edges_count;
4640 for (cgraph_node *n : self_gen_clones)
4641 {
4642 gocs.other_count = profile_count::zero ();
4643 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4644 &gocs, false);
4645 other_edges_count.safe_push (gocs.other_count);
4646 redist_sum -= gocs.other_count;
4647 }
4648
4649 hash_set<cgraph_edge *> processed_edges;
4650 unsigned i = 0;
4651 for (cgraph_node *n : self_gen_clones)
4652 {
4653 profile_count orig_count = n->count;
4654 profile_count new_count
4655 = (redist_sum.apply_scale (1, self_gen_clones.length ())
4656 + other_edges_count[i]);
4657 new_count = lenient_count_portion_handling (new_count, orig_node);
4658 n->count = new_count;
4659 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4660 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4661 {
4662 cs->count = cs->count.apply_scale (new_count, orig_count);
4663 processed_edges.add (cs);
4664 }
4665 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4666 cs->count = cs->count.apply_scale (new_count, orig_count);
4667
4668 i++;
4669 }
4670
4671 /* There are still going to be edges to ORIG_NODE that have one or more
4672 clones coming from another node clone in SELF_GEN_CLONES and which we
4673 scaled by the same amount, which means that the total incoming sum of
4674 counts to ORIG_NODE will be too high, scale such edges back. */
4675 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4676 {
4677 if (cs->callee->ultimate_alias_target () == orig_node)
4678 {
4679 unsigned den = 0;
4680 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4681 if (e->callee->ultimate_alias_target () == orig_node
4682 && processed_edges.contains (e))
4683 den++;
4684 if (den > 0)
4685 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4686 if (e->callee->ultimate_alias_target () == orig_node
4687 && processed_edges.contains (e))
4688 e->count = e->count.apply_scale (1, den);
4689 }
4690 }
4691
4692 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4693 along self-recursive edges are likely to have fairly low count and so
4694 edges from them to nodes in the self_gen_clones do not correspond to the
4695 artificially distributed count of the nodes, the total sum of incoming
4696 edges to some clones might be too low. Detect this situation and correct
4697 it. */
4698 for (cgraph_node *n : self_gen_clones)
4699 {
4700 if (!(n->count.ipa () > profile_count::zero ()))
4701 continue;
4702
4703 desc_incoming_count_struct desc;
4704 desc.orig = orig_node;
4705 desc.processed_edges = &processed_edges;
4706 desc.count = profile_count::zero ();
4707 desc.unproc_orig_rec_edges = 0;
4708 analyze_clone_icoming_counts (n, &desc);
4709
4710 if (n->count.differs_from_p (desc.count))
4711 {
4712 if (n->count > desc.count
4713 && desc.unproc_orig_rec_edges > 0)
4714 {
4715 desc.count = n->count - desc.count;
4716 desc.count
4717 = desc.count.apply_scale (1, desc.unproc_orig_rec_edges);
4718 adjust_clone_incoming_counts (n, &desc);
4719 }
4720 else if (dump_file)
4721 fprintf (dump_file,
4722 " Unable to fix up incoming counts for %s.\n",
4723 n->dump_name ());
4724 }
4725 }
4726
4727 if (dump_file)
4728 for (cgraph_node *n : self_gen_clones)
4729 dump_profile_updates (n, n != orig_node);
4730 return;
4731 }
4732
4733 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4734 their profile information to reflect this. This function should not be used
4735 for clones generated for arithmetic pass-through jump functions on a
4736 self-recursive call graph edge, that situation is handled by
4737 update_counts_for_self_gen_clones. */
4738
4739 static void
update_profiling_info(struct cgraph_node * orig_node,struct cgraph_node * new_node)4740 update_profiling_info (struct cgraph_node *orig_node,
4741 struct cgraph_node *new_node)
4742 {
4743 struct caller_statistics stats;
4744 profile_count new_sum;
4745 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4746
4747 if (!(orig_node_count > profile_count::zero ()))
4748 return;
4749
4750 if (dump_file)
4751 {
4752 fprintf (dump_file, " Updating profile from original count: ");
4753 orig_node_count.dump (dump_file);
4754 fprintf (dump_file, "\n");
4755 }
4756
4757 init_caller_stats (&stats, new_node);
4758 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4759 false);
4760 new_sum = stats.count_sum;
4761
4762 if (new_sum > orig_node_count)
4763 {
4764 /* TODO: Perhaps this should be gcc_unreachable ()? */
4765 remainder = profile_count::zero ().guessed_local ();
4766 }
4767 else if (stats.rec_count_sum.nonzero_p ())
4768 {
4769 int new_nonrec_calls = stats.n_nonrec_calls;
4770 /* There are self-recursive edges which are likely to bring in the
4771 majority of calls but which we must divide in between the original and
4772 new node. */
4773 init_caller_stats (&stats, orig_node);
4774 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4775 &stats, false);
4776 int orig_nonrec_calls = stats.n_nonrec_calls;
4777 profile_count orig_nonrec_call_count = stats.count_sum;
4778
4779 if (orig_node->local)
4780 {
4781 if (!orig_nonrec_call_count.nonzero_p ())
4782 {
4783 if (dump_file)
4784 fprintf (dump_file, " The original is local and the only "
4785 "incoming edges from non-dead callers with nonzero "
4786 "counts are self-recursive, assuming it is cold.\n");
4787 /* The NEW_NODE count and counts of all its outgoing edges
4788 are still unmodified copies of ORIG_NODE's. Just clear
4789 the latter and bail out. */
4790 profile_count zero;
4791 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4792 zero = profile_count::zero ().guessed_local ();
4793 else
4794 zero = profile_count::adjusted_zero ();
4795 orig_node->count = zero;
4796 for (cgraph_edge *cs = orig_node->callees;
4797 cs;
4798 cs = cs->next_callee)
4799 cs->count = zero;
4800 for (cgraph_edge *cs = orig_node->indirect_calls;
4801 cs;
4802 cs = cs->next_callee)
4803 cs->count = zero;
4804 return;
4805 }
4806 }
4807 else
4808 {
4809 /* Let's behave as if there was another caller that accounts for all
4810 the calls that were either indirect or from other compilation
4811 units. */
4812 orig_nonrec_calls++;
4813 profile_count pretend_caller_count
4814 = (orig_node_count - new_sum - orig_nonrec_call_count
4815 - stats.rec_count_sum);
4816 orig_nonrec_call_count += pretend_caller_count;
4817 }
4818
4819 /* Divide all "unexplained" counts roughly proportionally to sums of
4820 counts of non-recursive calls.
4821
4822 We put rather arbitrary limits on how many counts we claim because the
4823 number of non-self-recursive incoming count is only a rough guideline
4824 and there are cases (such as mcf) where using it blindly just takes
4825 too many. And if lattices are considered in the opposite order we
4826 could also take too few. */
4827 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4828
4829 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4830 profile_count new_part
4831 = MAX(MIN (unexp.apply_scale (new_sum,
4832 new_sum + orig_nonrec_call_count),
4833 unexp.apply_scale (limit_den - 1, limit_den)),
4834 unexp.apply_scale (new_nonrec_calls, limit_den));
4835 if (dump_file)
4836 {
4837 fprintf (dump_file, " Claiming ");
4838 new_part.dump (dump_file);
4839 fprintf (dump_file, " of unexplained ");
4840 unexp.dump (dump_file);
4841 fprintf (dump_file, " counts because of self-recursive "
4842 "calls\n");
4843 }
4844 new_sum += new_part;
4845 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4846 orig_node);
4847 }
4848 else
4849 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4850 orig_node);
4851
4852 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4853 new_node->count = new_sum;
4854 orig_node->count = remainder;
4855
4856 profile_count orig_new_node_count = orig_node_count;
4857 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4858 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4859 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4860 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4861 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4862
4863 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4864 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4865 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4866 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4867 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4868
4869 if (dump_file)
4870 {
4871 dump_profile_updates (new_node, true);
4872 dump_profile_updates (orig_node, false);
4873 }
4874 }
4875
4876 /* Update the respective profile of specialized NEW_NODE and the original
4877 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4878 have been redirected to the specialized version. */
4879
4880 static void
update_specialized_profile(struct cgraph_node * new_node,struct cgraph_node * orig_node,profile_count redirected_sum)4881 update_specialized_profile (struct cgraph_node *new_node,
4882 struct cgraph_node *orig_node,
4883 profile_count redirected_sum)
4884 {
4885 struct cgraph_edge *cs;
4886 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
4887
4888 if (dump_file)
4889 {
4890 fprintf (dump_file, " the sum of counts of redirected edges is ");
4891 redirected_sum.dump (dump_file);
4892 fprintf (dump_file, "\n old ipa count of the original node is ");
4893 orig_node_count.dump (dump_file);
4894 fprintf (dump_file, "\n");
4895 }
4896 if (!(orig_node_count > profile_count::zero ()))
4897 return;
4898
4899 new_node_count = new_node->count;
4900 new_node->count += redirected_sum;
4901 orig_node->count
4902 = lenient_count_portion_handling (orig_node->count - redirected_sum,
4903 orig_node);
4904
4905 for (cs = new_node->callees; cs; cs = cs->next_callee)
4906 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4907
4908 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4909 {
4910 profile_count dec = cs->count.apply_scale (redirected_sum,
4911 orig_node_count);
4912 cs->count -= dec;
4913 }
4914
4915 if (dump_file)
4916 {
4917 dump_profile_updates (new_node, true);
4918 dump_profile_updates (orig_node, false);
4919 }
4920 }
4921
4922 static void adjust_references_in_caller (cgraph_edge *cs,
4923 symtab_node *symbol, int index);
4924
4925 /* Simple structure to pass a symbol and index (with same meaning as parameters
4926 of adjust_references_in_caller) through a void* parameter of a
4927 call_for_symbol_thunks_and_aliases callback. */
4928 struct symbol_and_index_together
4929 {
4930 symtab_node *symbol;
4931 int index;
4932 };
4933
4934 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4935 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4936 static bool
adjust_refs_in_act_callers(struct cgraph_node * node,void * data)4937 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4938 {
4939 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4940 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4941 if (!cs->caller->thunk)
4942 adjust_references_in_caller (cs, pack->symbol, pack->index);
4943 return false;
4944 }
4945
4946 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4947 variable which is only dereferenced and which is represented by SYMBOL. See
4948 if we can remove ADDR reference in callers assosiated witht the call. */
4949
4950 static void
adjust_references_in_caller(cgraph_edge * cs,symtab_node * symbol,int index)4951 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4952 {
4953 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4954 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4955 if (jfunc->type == IPA_JF_CONST)
4956 {
4957 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4958 cs->lto_stmt_uid,
4959 IPA_REF_ADDR);
4960 if (!to_del)
4961 return;
4962 to_del->remove_reference ();
4963 ipa_zap_jf_refdesc (jfunc);
4964 if (dump_file)
4965 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4966 cs->caller->dump_name (), symbol->dump_name ());
4967 return;
4968 }
4969
4970 if (jfunc->type != IPA_JF_PASS_THROUGH
4971 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4972 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4973 return;
4974
4975 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4976 cgraph_node *caller = cs->caller;
4977 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4978 /* TODO: This consistency check may be too big and not really
4979 that useful. Consider removing it. */
4980 tree cst;
4981 if (caller_info->ipcp_orig_node)
4982 cst = caller_info->known_csts[fidx];
4983 else
4984 {
4985 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4986 gcc_assert (lat->is_single_const ());
4987 cst = lat->values->value;
4988 }
4989 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4990 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4991 == symbol));
4992
4993 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4994 if (cuses == IPA_UNDESCRIBED_USE)
4995 return;
4996 gcc_assert (cuses > 0);
4997 cuses--;
4998 ipa_set_controlled_uses (caller_info, fidx, cuses);
4999 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5000 if (dump_file && (dump_flags & TDF_DETAILS))
5001 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5002 "to %i.\n", fidx, caller->dump_name (), cuses);
5003 if (cuses)
5004 return;
5005
5006 if (caller_info->ipcp_orig_node)
5007 {
5008 /* Cloning machinery has created a reference here, we need to either
5009 remove it or change it to a read one. */
5010 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5011 if (to_del)
5012 {
5013 to_del->remove_reference ();
5014 if (dump_file)
5015 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5016 cs->caller->dump_name (), symbol->dump_name ());
5017 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5018 {
5019 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5020 if (dump_file)
5021 fprintf (dump_file,
5022 " ...and replaced it with LOAD one.\n");
5023 }
5024 }
5025 }
5026
5027 symbol_and_index_together pack;
5028 pack.symbol = symbol;
5029 pack.index = fidx;
5030 if (caller->can_change_signature)
5031 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5032 &pack, true);
5033 }
5034
5035
5036 /* Return true if we would like to remove a parameter from NODE when cloning it
5037 with KNOWN_CSTS scalar constants. */
5038
5039 static bool
want_remove_some_param_p(cgraph_node * node,vec<tree> known_csts)5040 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5041 {
5042 auto_vec<bool, 16> surviving;
5043 bool filled_vec = false;
5044 ipa_node_params *info = ipa_node_params_sum->get (node);
5045 int i, count = ipa_get_param_count (info);
5046
5047 for (i = 0; i < count; i++)
5048 {
5049 if (!known_csts[i] && ipa_is_param_used (info, i))
5050 continue;
5051
5052 if (!filled_vec)
5053 {
5054 clone_info *info = clone_info::get (node);
5055 if (!info || !info->param_adjustments)
5056 return true;
5057 info->param_adjustments->get_surviving_params (&surviving);
5058 filled_vec = true;
5059 }
5060 if (surviving.length() < (unsigned) i && surviving[i])
5061 return true;
5062 }
5063 return false;
5064 }
5065
5066 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5067 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5068 redirect all edges in CALLERS to it. */
5069
5070 static struct cgraph_node *
create_specialized_node(struct cgraph_node * node,vec<tree> known_csts,vec<ipa_polymorphic_call_context> known_contexts,struct ipa_agg_replacement_value * aggvals,vec<cgraph_edge * > & callers)5071 create_specialized_node (struct cgraph_node *node,
5072 vec<tree> known_csts,
5073 vec<ipa_polymorphic_call_context> known_contexts,
5074 struct ipa_agg_replacement_value *aggvals,
5075 vec<cgraph_edge *> &callers)
5076 {
5077 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5078 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5079 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5080 struct ipa_agg_replacement_value *av;
5081 struct cgraph_node *new_node;
5082 int i, count = ipa_get_param_count (info);
5083 clone_info *cinfo = clone_info::get (node);
5084 ipa_param_adjustments *old_adjustments = cinfo
5085 ? cinfo->param_adjustments : NULL;
5086 ipa_param_adjustments *new_adjustments;
5087 gcc_assert (!info->ipcp_orig_node);
5088 gcc_assert (node->can_change_signature
5089 || !old_adjustments);
5090
5091 if (old_adjustments)
5092 {
5093 /* At the moment all IPA optimizations should use the number of
5094 parameters of the prevailing decl as the m_always_copy_start.
5095 Handling any other value would complicate the code below, so for the
5096 time bing let's only assert it is so. */
5097 gcc_assert (old_adjustments->m_always_copy_start == count
5098 || old_adjustments->m_always_copy_start < 0);
5099 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5100 for (i = 0; i < old_adj_count; i++)
5101 {
5102 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5103 if (!node->can_change_signature
5104 || old_adj->op != IPA_PARAM_OP_COPY
5105 || (!known_csts[old_adj->base_index]
5106 && ipa_is_param_used (info, old_adj->base_index)))
5107 {
5108 ipa_adjusted_param new_adj = *old_adj;
5109
5110 new_adj.prev_clone_adjustment = true;
5111 new_adj.prev_clone_index = i;
5112 vec_safe_push (new_params, new_adj);
5113 }
5114 }
5115 bool skip_return = old_adjustments->m_skip_return;
5116 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5117 ipa_param_adjustments (new_params, count,
5118 skip_return));
5119 }
5120 else if (node->can_change_signature
5121 && want_remove_some_param_p (node, known_csts))
5122 {
5123 ipa_adjusted_param adj;
5124 memset (&adj, 0, sizeof (adj));
5125 adj.op = IPA_PARAM_OP_COPY;
5126 for (i = 0; i < count; i++)
5127 if (!known_csts[i] && ipa_is_param_used (info, i))
5128 {
5129 adj.base_index = i;
5130 adj.prev_clone_index = i;
5131 vec_safe_push (new_params, adj);
5132 }
5133 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5134 ipa_param_adjustments (new_params, count, false));
5135 }
5136 else
5137 new_adjustments = NULL;
5138
5139 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5140 for (i = callers.length () - 1; i >= 0; i--)
5141 {
5142 cgraph_edge *cs = callers[i];
5143 if (cs->caller == node)
5144 {
5145 self_recursive_calls.safe_push (cs);
5146 callers.unordered_remove (i);
5147 }
5148 }
5149 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5150 for (i = 0; i < count; i++)
5151 {
5152 tree t = known_csts[i];
5153 if (!t)
5154 continue;
5155
5156 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5157
5158 bool load_ref = false;
5159 symtab_node *ref_symbol;
5160 if (TREE_CODE (t) == ADDR_EXPR)
5161 {
5162 tree base = get_base_address (TREE_OPERAND (t, 0));
5163 if (TREE_CODE (base) == VAR_DECL
5164 && ipa_get_controlled_uses (info, i) == 0
5165 && ipa_get_param_load_dereferenced (info, i)
5166 && (ref_symbol = symtab_node::get (base)))
5167 {
5168 load_ref = true;
5169 if (node->can_change_signature)
5170 for (cgraph_edge *caller : callers)
5171 adjust_references_in_caller (caller, ref_symbol, i);
5172 }
5173 }
5174
5175 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5176 if (replace_map)
5177 vec_safe_push (replace_trees, replace_map);
5178 }
5179
5180 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5181 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5182 node->decl)));
5183 new_node = node->create_virtual_clone (callers, replace_trees,
5184 new_adjustments, "constprop",
5185 suffix_counter);
5186 suffix_counter++;
5187
5188 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5189 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5190 {
5191 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5192 /* Cloned edges can disappear during cloning as speculation can be
5193 resolved, check that we have one and that it comes from the last
5194 cloning. */
5195 if (cs && cs->caller == new_node)
5196 cs->redirect_callee_duplicating_thunks (new_node);
5197 /* Any future code that would make more than one clone of an outgoing
5198 edge would confuse this mechanism, so let's check that does not
5199 happen. */
5200 gcc_checking_assert (!cs
5201 || !get_next_cgraph_edge_clone (cs)
5202 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5203 }
5204 if (have_self_recursive_calls)
5205 new_node->expand_all_artificial_thunks ();
5206
5207 ipa_set_node_agg_value_chain (new_node, aggvals);
5208 for (av = aggvals; av; av = av->next)
5209 new_node->maybe_create_reference (av->value, NULL);
5210
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5212 {
5213 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5214 if (known_contexts.exists ())
5215 {
5216 for (i = 0; i < count; i++)
5217 if (!known_contexts[i].useless_p ())
5218 {
5219 fprintf (dump_file, " known ctx %i is ", i);
5220 known_contexts[i].dump (dump_file);
5221 }
5222 }
5223 if (aggvals)
5224 ipa_dump_agg_replacement_values (dump_file, aggvals);
5225 }
5226
5227 new_info = ipa_node_params_sum->get (new_node);
5228 new_info->ipcp_orig_node = node;
5229 new_node->ipcp_clone = true;
5230 new_info->known_csts = known_csts;
5231 new_info->known_contexts = known_contexts;
5232
5233 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5234
5235 return new_node;
5236 }
5237
5238 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5239 pass-through function to itself when the cgraph_node involved is not an
5240 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5241 no-operation pass-through. */
5242
5243 static bool
self_recursive_pass_through_p(cgraph_edge * cs,ipa_jump_func * jfunc,int i,bool simple=true)5244 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5245 bool simple = true)
5246 {
5247 enum availability availability;
5248 if (cs->caller == cs->callee->function_symbol (&availability)
5249 && availability > AVAIL_INTERPOSABLE
5250 && jfunc->type == IPA_JF_PASS_THROUGH
5251 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5252 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5253 && ipa_node_params_sum->get (cs->caller)
5254 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5255 return true;
5256 return false;
5257 }
5258
5259 /* Return true if JFUNC, which describes a part of an aggregate represented or
5260 pointed to by the i-th parameter of call CS, is a pass-through function to
5261 itself when the cgraph_node involved is not an IPA-CP clone.. When
5262 SIMPLE is true, further check if JFUNC is a simple no-operation
5263 pass-through. */
5264
5265 static bool
self_recursive_agg_pass_through_p(cgraph_edge * cs,ipa_agg_jf_item * jfunc,int i,bool simple=true)5266 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5267 int i, bool simple = true)
5268 {
5269 enum availability availability;
5270 if (cs->caller == cs->callee->function_symbol (&availability)
5271 && availability > AVAIL_INTERPOSABLE
5272 && jfunc->jftype == IPA_JF_LOAD_AGG
5273 && jfunc->offset == jfunc->value.load_agg.offset
5274 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5275 && jfunc->value.pass_through.formal_id == i
5276 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5277 && ipa_node_params_sum->get (cs->caller)
5278 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5279 return true;
5280 return false;
5281 }
5282
5283 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5284 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5285
5286 static void
find_more_scalar_values_for_callers_subset(struct cgraph_node * node,vec<tree> & known_csts,const vec<cgraph_edge * > & callers)5287 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5288 vec<tree> &known_csts,
5289 const vec<cgraph_edge *> &callers)
5290 {
5291 ipa_node_params *info = ipa_node_params_sum->get (node);
5292 int i, count = ipa_get_param_count (info);
5293
5294 for (i = 0; i < count; i++)
5295 {
5296 struct cgraph_edge *cs;
5297 tree newval = NULL_TREE;
5298 int j;
5299 bool first = true;
5300 tree type = ipa_get_type (info, i);
5301
5302 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5303 continue;
5304
5305 FOR_EACH_VEC_ELT (callers, j, cs)
5306 {
5307 struct ipa_jump_func *jump_func;
5308 tree t;
5309
5310 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5311 if (!args
5312 || i >= ipa_get_cs_argument_count (args)
5313 || (i == 0
5314 && call_passes_through_thunk (cs)))
5315 {
5316 newval = NULL_TREE;
5317 break;
5318 }
5319 jump_func = ipa_get_ith_jump_func (args, i);
5320
5321 /* Besides simple pass-through jump function, arithmetic jump
5322 function could also introduce argument-direct-pass-through for
5323 self-feeding recursive call. For example,
5324
5325 fn (int i)
5326 {
5327 fn (i & 1);
5328 }
5329
5330 Given that i is 0, recursive propagation via (i & 1) also gets
5331 0. */
5332 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5333 {
5334 gcc_assert (newval);
5335 t = ipa_get_jf_arith_result (
5336 ipa_get_jf_pass_through_operation (jump_func),
5337 newval,
5338 ipa_get_jf_pass_through_operand (jump_func),
5339 type);
5340 }
5341 else
5342 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5343 jump_func, type);
5344 if (!t
5345 || (newval
5346 && !values_equal_for_ipcp_p (t, newval))
5347 || (!first && !newval))
5348 {
5349 newval = NULL_TREE;
5350 break;
5351 }
5352 else
5353 newval = t;
5354 first = false;
5355 }
5356
5357 if (newval)
5358 {
5359 if (dump_file && (dump_flags & TDF_DETAILS))
5360 {
5361 fprintf (dump_file, " adding an extra known scalar value ");
5362 print_ipcp_constant_value (dump_file, newval);
5363 fprintf (dump_file, " for ");
5364 ipa_dump_param (dump_file, info, i);
5365 fprintf (dump_file, "\n");
5366 }
5367
5368 known_csts[i] = newval;
5369 }
5370 }
5371 }
5372
5373 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5374 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5375 CALLERS. */
5376
5377 static void
find_more_contexts_for_caller_subset(cgraph_node * node,vec<ipa_polymorphic_call_context> * known_contexts,const vec<cgraph_edge * > & callers)5378 find_more_contexts_for_caller_subset (cgraph_node *node,
5379 vec<ipa_polymorphic_call_context>
5380 *known_contexts,
5381 const vec<cgraph_edge *> &callers)
5382 {
5383 ipa_node_params *info = ipa_node_params_sum->get (node);
5384 int i, count = ipa_get_param_count (info);
5385
5386 for (i = 0; i < count; i++)
5387 {
5388 cgraph_edge *cs;
5389
5390 if (ipa_get_poly_ctx_lat (info, i)->bottom
5391 || (known_contexts->exists ()
5392 && !(*known_contexts)[i].useless_p ()))
5393 continue;
5394
5395 ipa_polymorphic_call_context newval;
5396 bool first = true;
5397 int j;
5398
5399 FOR_EACH_VEC_ELT (callers, j, cs)
5400 {
5401 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5402 if (!args
5403 || i >= ipa_get_cs_argument_count (args))
5404 return;
5405 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5406 ipa_polymorphic_call_context ctx;
5407 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5408 cs, i, jfunc);
5409 if (first)
5410 {
5411 newval = ctx;
5412 first = false;
5413 }
5414 else
5415 newval.meet_with (ctx);
5416 if (newval.useless_p ())
5417 break;
5418 }
5419
5420 if (!newval.useless_p ())
5421 {
5422 if (dump_file && (dump_flags & TDF_DETAILS))
5423 {
5424 fprintf (dump_file, " adding an extra known polymorphic "
5425 "context ");
5426 print_ipcp_constant_value (dump_file, newval);
5427 fprintf (dump_file, " for ");
5428 ipa_dump_param (dump_file, info, i);
5429 fprintf (dump_file, "\n");
5430 }
5431
5432 if (!known_contexts->exists ())
5433 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5434 true);
5435 (*known_contexts)[i] = newval;
5436 }
5437
5438 }
5439 }
5440
5441 /* Go through PLATS and create a vector of values consisting of values and
5442 offsets (minus OFFSET) of lattices that contain only a single value. */
5443
5444 static vec<ipa_agg_value>
copy_plats_to_inter(class ipcp_param_lattices * plats,HOST_WIDE_INT offset)5445 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5446 {
5447 vec<ipa_agg_value> res = vNULL;
5448
5449 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5450 return vNULL;
5451
5452 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5453 if (aglat->is_single_const ())
5454 {
5455 struct ipa_agg_value ti;
5456 ti.offset = aglat->offset - offset;
5457 ti.value = aglat->values->value;
5458 res.safe_push (ti);
5459 }
5460 return res;
5461 }
5462
5463 /* Intersect all values in INTER with single value lattices in PLATS (while
5464 subtracting OFFSET). */
5465
5466 static void
intersect_with_plats(class ipcp_param_lattices * plats,vec<ipa_agg_value> * inter,HOST_WIDE_INT offset)5467 intersect_with_plats (class ipcp_param_lattices *plats,
5468 vec<ipa_agg_value> *inter,
5469 HOST_WIDE_INT offset)
5470 {
5471 struct ipcp_agg_lattice *aglat;
5472 struct ipa_agg_value *item;
5473 int k;
5474
5475 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5476 {
5477 inter->release ();
5478 return;
5479 }
5480
5481 aglat = plats->aggs;
5482 FOR_EACH_VEC_ELT (*inter, k, item)
5483 {
5484 bool found = false;
5485 if (!item->value)
5486 continue;
5487 while (aglat)
5488 {
5489 if (aglat->offset - offset > item->offset)
5490 break;
5491 if (aglat->offset - offset == item->offset)
5492 {
5493 if (aglat->is_single_const ())
5494 {
5495 tree value = aglat->values->value;
5496
5497 if (values_equal_for_ipcp_p (item->value, value))
5498 found = true;
5499 }
5500 break;
5501 }
5502 aglat = aglat->next;
5503 }
5504 if (!found)
5505 item->value = NULL_TREE;
5506 }
5507 }
5508
5509 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5510 vector result while subtracting OFFSET from the individual value offsets. */
5511
5512 static vec<ipa_agg_value>
agg_replacements_to_vector(struct cgraph_node * node,int index,HOST_WIDE_INT offset)5513 agg_replacements_to_vector (struct cgraph_node *node, int index,
5514 HOST_WIDE_INT offset)
5515 {
5516 struct ipa_agg_replacement_value *av;
5517 vec<ipa_agg_value> res = vNULL;
5518
5519 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5520 if (av->index == index
5521 && (av->offset - offset) >= 0)
5522 {
5523 struct ipa_agg_value item;
5524 gcc_checking_assert (av->value);
5525 item.offset = av->offset - offset;
5526 item.value = av->value;
5527 res.safe_push (item);
5528 }
5529
5530 return res;
5531 }
5532
5533 /* Intersect all values in INTER with those that we have already scheduled to
5534 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5535 (while subtracting OFFSET). */
5536
5537 static void
intersect_with_agg_replacements(struct cgraph_node * node,int index,vec<ipa_agg_value> * inter,HOST_WIDE_INT offset)5538 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5539 vec<ipa_agg_value> *inter,
5540 HOST_WIDE_INT offset)
5541 {
5542 struct ipa_agg_replacement_value *srcvals;
5543 struct ipa_agg_value *item;
5544 int i;
5545
5546 srcvals = ipa_get_agg_replacements_for_node (node);
5547 if (!srcvals)
5548 {
5549 inter->release ();
5550 return;
5551 }
5552
5553 FOR_EACH_VEC_ELT (*inter, i, item)
5554 {
5555 struct ipa_agg_replacement_value *av;
5556 bool found = false;
5557 if (!item->value)
5558 continue;
5559 for (av = srcvals; av; av = av->next)
5560 {
5561 gcc_checking_assert (av->value);
5562 if (av->index == index
5563 && av->offset - offset == item->offset)
5564 {
5565 if (values_equal_for_ipcp_p (item->value, av->value))
5566 found = true;
5567 break;
5568 }
5569 }
5570 if (!found)
5571 item->value = NULL_TREE;
5572 }
5573 }
5574
5575 /* Intersect values in INTER with aggregate values that come along edge CS to
5576 parameter number INDEX and return it. If INTER does not actually exist yet,
5577 copy all incoming values to it. If we determine we ended up with no values
5578 whatsoever, return a released vector. */
5579
5580 static vec<ipa_agg_value>
intersect_aggregates_with_edge(struct cgraph_edge * cs,int index,vec<ipa_agg_value> inter)5581 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5582 vec<ipa_agg_value> inter)
5583 {
5584 struct ipa_jump_func *jfunc;
5585 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5586 if (jfunc->type == IPA_JF_PASS_THROUGH
5587 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5588 {
5589 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5590 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5591
5592 if (caller_info->ipcp_orig_node)
5593 {
5594 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5595 class ipcp_param_lattices *orig_plats;
5596 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5597 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5598 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5599 {
5600 if (!inter.exists ())
5601 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5602 else
5603 intersect_with_agg_replacements (cs->caller, src_idx,
5604 &inter, 0);
5605 return inter;
5606 }
5607 }
5608 else
5609 {
5610 class ipcp_param_lattices *src_plats;
5611 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5612 if (agg_pass_through_permissible_p (src_plats, jfunc))
5613 {
5614 /* Currently we do not produce clobber aggregate jump
5615 functions, adjust when we do. */
5616 gcc_checking_assert (!jfunc->agg.items);
5617 if (!inter.exists ())
5618 inter = copy_plats_to_inter (src_plats, 0);
5619 else
5620 intersect_with_plats (src_plats, &inter, 0);
5621 return inter;
5622 }
5623 }
5624 }
5625 else if (jfunc->type == IPA_JF_ANCESTOR
5626 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5627 {
5628 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5629 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5630 class ipcp_param_lattices *src_plats;
5631 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5632
5633 if (caller_info->ipcp_orig_node)
5634 {
5635 if (!inter.exists ())
5636 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5637 else
5638 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5639 delta);
5640 }
5641 else
5642 {
5643 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5644 /* Currently we do not produce clobber aggregate jump
5645 functions, adjust when we do. */
5646 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5647 if (!inter.exists ())
5648 inter = copy_plats_to_inter (src_plats, delta);
5649 else
5650 intersect_with_plats (src_plats, &inter, delta);
5651 }
5652 return inter;
5653 }
5654
5655 if (jfunc->agg.items)
5656 {
5657 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5658 struct ipa_agg_value *item;
5659 int k;
5660
5661 if (!inter.exists ())
5662 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5663 {
5664 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5665 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5666 agg_item);
5667 if (value)
5668 {
5669 struct ipa_agg_value agg_value;
5670
5671 agg_value.value = value;
5672 agg_value.offset = agg_item->offset;
5673 inter.safe_push (agg_value);
5674 }
5675 }
5676 else
5677 FOR_EACH_VEC_ELT (inter, k, item)
5678 {
5679 int l = 0;
5680 bool found = false;
5681
5682 if (!item->value)
5683 continue;
5684
5685 while ((unsigned) l < jfunc->agg.items->length ())
5686 {
5687 struct ipa_agg_jf_item *ti;
5688 ti = &(*jfunc->agg.items)[l];
5689 if (ti->offset > item->offset)
5690 break;
5691 if (ti->offset == item->offset)
5692 {
5693 tree value;
5694
5695 /* Besides simple pass-through aggregate jump function,
5696 arithmetic aggregate jump function could also bring
5697 same aggregate value as parameter passed-in for
5698 self-feeding recursive call. For example,
5699
5700 fn (int *i)
5701 {
5702 int j = *i & 1;
5703 fn (&j);
5704 }
5705
5706 Given that *i is 0, recursive propagation via (*i & 1)
5707 also gets 0. */
5708 if (self_recursive_agg_pass_through_p (cs, ti, index,
5709 false))
5710 value = ipa_get_jf_arith_result (
5711 ti->value.pass_through.operation,
5712 item->value,
5713 ti->value.pass_through.operand,
5714 ti->type);
5715 else
5716 value = ipa_agg_value_from_node (caller_info,
5717 cs->caller, ti);
5718
5719 if (value && values_equal_for_ipcp_p (item->value, value))
5720 found = true;
5721 break;
5722 }
5723 l++;
5724 }
5725 if (!found)
5726 item->value = NULL;
5727 }
5728 }
5729 else
5730 {
5731 inter.release ();
5732 return vNULL;
5733 }
5734 return inter;
5735 }
5736
5737 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5738 from all of them. */
5739
5740 static struct ipa_agg_replacement_value *
find_aggregate_values_for_callers_subset(struct cgraph_node * node,const vec<cgraph_edge * > & callers)5741 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5742 const vec<cgraph_edge *> &callers)
5743 {
5744 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5745 struct ipa_agg_replacement_value *res;
5746 struct ipa_agg_replacement_value **tail = &res;
5747 struct cgraph_edge *cs;
5748 int i, j, count = ipa_get_param_count (dest_info);
5749
5750 FOR_EACH_VEC_ELT (callers, j, cs)
5751 {
5752 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5753 if (!args)
5754 {
5755 count = 0;
5756 break;
5757 }
5758 int c = ipa_get_cs_argument_count (args);
5759 if (c < count)
5760 count = c;
5761 }
5762
5763 for (i = 0; i < count; i++)
5764 {
5765 struct cgraph_edge *cs;
5766 vec<ipa_agg_value> inter = vNULL;
5767 struct ipa_agg_value *item;
5768 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5769 int j;
5770
5771 /* Among other things, the following check should deal with all by_ref
5772 mismatches. */
5773 if (plats->aggs_bottom)
5774 continue;
5775
5776 FOR_EACH_VEC_ELT (callers, j, cs)
5777 {
5778 struct ipa_jump_func *jfunc
5779 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5780 if (self_recursive_pass_through_p (cs, jfunc, i)
5781 && (!plats->aggs_by_ref
5782 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5783 continue;
5784 inter = intersect_aggregates_with_edge (cs, i, inter);
5785
5786 if (!inter.exists ())
5787 goto next_param;
5788 }
5789
5790 FOR_EACH_VEC_ELT (inter, j, item)
5791 {
5792 struct ipa_agg_replacement_value *v;
5793
5794 if (!item->value)
5795 continue;
5796
5797 v = ggc_alloc<ipa_agg_replacement_value> ();
5798 v->index = i;
5799 v->offset = item->offset;
5800 v->value = item->value;
5801 v->by_ref = plats->aggs_by_ref;
5802 *tail = v;
5803 tail = &v->next;
5804 }
5805
5806 next_param:
5807 if (inter.exists ())
5808 inter.release ();
5809 }
5810 *tail = NULL;
5811 return res;
5812 }
5813
5814 /* Determine whether CS also brings all scalar values that the NODE is
5815 specialized for. */
5816
5817 static bool
cgraph_edge_brings_all_scalars_for_node(struct cgraph_edge * cs,struct cgraph_node * node)5818 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5819 struct cgraph_node *node)
5820 {
5821 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5822 int count = ipa_get_param_count (dest_info);
5823 class ipa_node_params *caller_info;
5824 class ipa_edge_args *args;
5825 int i;
5826
5827 caller_info = ipa_node_params_sum->get (cs->caller);
5828 args = ipa_edge_args_sum->get (cs);
5829 for (i = 0; i < count; i++)
5830 {
5831 struct ipa_jump_func *jump_func;
5832 tree val, t;
5833
5834 val = dest_info->known_csts[i];
5835 if (!val)
5836 continue;
5837
5838 if (i >= ipa_get_cs_argument_count (args))
5839 return false;
5840 jump_func = ipa_get_ith_jump_func (args, i);
5841 t = ipa_value_from_jfunc (caller_info, jump_func,
5842 ipa_get_type (dest_info, i));
5843 if (!t || !values_equal_for_ipcp_p (val, t))
5844 return false;
5845 }
5846 return true;
5847 }
5848
5849 /* Determine whether CS also brings all aggregate values that NODE is
5850 specialized for. */
5851 static bool
cgraph_edge_brings_all_agg_vals_for_node(struct cgraph_edge * cs,struct cgraph_node * node)5852 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5853 struct cgraph_node *node)
5854 {
5855 struct ipa_agg_replacement_value *aggval;
5856 int i, ec, count;
5857
5858 aggval = ipa_get_agg_replacements_for_node (node);
5859 if (!aggval)
5860 return true;
5861
5862 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5863 count = ipa_get_param_count (clone_node_info);
5864 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5865 if (ec < count)
5866 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5867 if (aggval->index >= ec)
5868 return false;
5869
5870 ipa_node_params *orig_node_info
5871 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5872
5873 for (i = 0; i < count; i++)
5874 {
5875 class ipcp_param_lattices *plats;
5876 bool interesting = false;
5877 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5878 if (aggval->index == i)
5879 {
5880 interesting = true;
5881 break;
5882 }
5883 if (!interesting)
5884 continue;
5885
5886 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5887 if (plats->aggs_bottom)
5888 return false;
5889
5890 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5891 if (!values.exists ())
5892 return false;
5893
5894 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5895 if (aggval->index == i)
5896 {
5897 struct ipa_agg_value *item;
5898 int j;
5899 bool found = false;
5900 FOR_EACH_VEC_ELT (values, j, item)
5901 if (item->value
5902 && item->offset == av->offset
5903 && values_equal_for_ipcp_p (item->value, av->value))
5904 {
5905 found = true;
5906 break;
5907 }
5908 if (!found)
5909 {
5910 values.release ();
5911 return false;
5912 }
5913 }
5914 values.release ();
5915 }
5916 return true;
5917 }
5918
5919 /* Given an original NODE and a VAL for which we have already created a
5920 specialized clone, look whether there are incoming edges that still lead
5921 into the old node but now also bring the requested value and also conform to
5922 all other criteria such that they can be redirected the special node.
5923 This function can therefore redirect the final edge in a SCC. */
5924
5925 template <typename valtype>
5926 static void
perhaps_add_new_callers(cgraph_node * node,ipcp_value<valtype> * val)5927 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5928 {
5929 ipcp_value_source<valtype> *src;
5930 profile_count redirected_sum = profile_count::zero ();
5931
5932 for (src = val->sources; src; src = src->next)
5933 {
5934 struct cgraph_edge *cs = src->cs;
5935 while (cs)
5936 {
5937 if (cgraph_edge_brings_value_p (cs, src, node, val)
5938 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5939 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5940 {
5941 if (dump_file)
5942 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5943 cs->caller->dump_name (),
5944 val->spec_node->dump_name ());
5945
5946 cs->redirect_callee_duplicating_thunks (val->spec_node);
5947 val->spec_node->expand_all_artificial_thunks ();
5948 if (cs->count.ipa ().initialized_p ())
5949 redirected_sum = redirected_sum + cs->count.ipa ();
5950 }
5951 cs = get_next_cgraph_edge_clone (cs);
5952 }
5953 }
5954
5955 if (redirected_sum.nonzero_p ())
5956 update_specialized_profile (val->spec_node, node, redirected_sum);
5957 }
5958
5959 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5960
5961 static bool
known_contexts_useful_p(vec<ipa_polymorphic_call_context> known_contexts)5962 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5963 {
5964 ipa_polymorphic_call_context *ctx;
5965 int i;
5966
5967 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5968 if (!ctx->useless_p ())
5969 return true;
5970 return false;
5971 }
5972
5973 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5974
5975 static vec<ipa_polymorphic_call_context>
copy_useful_known_contexts(const vec<ipa_polymorphic_call_context> & known_contexts)5976 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5977 {
5978 if (known_contexts_useful_p (known_contexts))
5979 return known_contexts.copy ();
5980 else
5981 return vNULL;
5982 }
5983
5984 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5985 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5986 copy too. */
5987
5988 static void
copy_known_vectors_add_val(ipa_auto_call_arg_values * avals,vec<tree> * known_csts,vec<ipa_polymorphic_call_context> * known_contexts,ipcp_value<tree> * val,int index)5989 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5990 vec<tree> *known_csts,
5991 vec<ipa_polymorphic_call_context> *known_contexts,
5992 ipcp_value<tree> *val, int index)
5993 {
5994 *known_csts = avals->m_known_vals.copy ();
5995 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5996 (*known_csts)[index] = val->value;
5997 }
5998
5999 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
6000 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6001 INDEX. */
6002
6003 static void
copy_known_vectors_add_val(ipa_auto_call_arg_values * avals,vec<tree> * known_csts,vec<ipa_polymorphic_call_context> * known_contexts,ipcp_value<ipa_polymorphic_call_context> * val,int index)6004 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6005 vec<tree> *known_csts,
6006 vec<ipa_polymorphic_call_context> *known_contexts,
6007 ipcp_value<ipa_polymorphic_call_context> *val,
6008 int index)
6009 {
6010 *known_csts = avals->m_known_vals.copy ();
6011 *known_contexts = avals->m_known_contexts.copy ();
6012 (*known_contexts)[index] = val->value;
6013 }
6014
6015 /* Return true if OFFSET indicates this was not an aggregate value or there is
6016 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6017 AGGVALS list. */
6018
6019 DEBUG_FUNCTION bool
ipcp_val_agg_replacement_ok_p(ipa_agg_replacement_value * aggvals,int index,HOST_WIDE_INT offset,tree value)6020 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
6021 int index, HOST_WIDE_INT offset, tree value)
6022 {
6023 if (offset == -1)
6024 return true;
6025
6026 while (aggvals)
6027 {
6028 if (aggvals->index == index
6029 && aggvals->offset == offset
6030 && values_equal_for_ipcp_p (aggvals->value, value))
6031 return true;
6032 aggvals = aggvals->next;
6033 }
6034 return false;
6035 }
6036
6037 /* Return true if offset is minus one because source of a polymorphic context
6038 cannot be an aggregate value. */
6039
6040 DEBUG_FUNCTION bool
ipcp_val_agg_replacement_ok_p(ipa_agg_replacement_value *,int,HOST_WIDE_INT offset,ipa_polymorphic_call_context)6041 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6042 int , HOST_WIDE_INT offset,
6043 ipa_polymorphic_call_context)
6044 {
6045 return offset == -1;
6046 }
6047
6048 /* Decide whether to create a special version of NODE for value VAL of
6049 parameter at the given INDEX. If OFFSET is -1, the value is for the
6050 parameter itself, otherwise it is stored at the given OFFSET of the
6051 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6052 is a vector which contains clones created for self-recursive calls with an
6053 arithmetic pass-through jump function. */
6054
6055 template <typename valtype>
6056 static bool
decide_about_value(struct cgraph_node * node,int index,HOST_WIDE_INT offset,ipcp_value<valtype> * val,ipa_auto_call_arg_values * avals,vec<cgraph_node * > * self_gen_clones)6057 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6058 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6059 vec<cgraph_node *> *self_gen_clones)
6060 {
6061 struct ipa_agg_replacement_value *aggvals;
6062 int caller_count;
6063 sreal freq_sum;
6064 profile_count count_sum, rec_count_sum;
6065 vec<cgraph_edge *> callers;
6066
6067 if (val->spec_node)
6068 {
6069 perhaps_add_new_callers (node, val);
6070 return false;
6071 }
6072 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6073 {
6074 if (dump_file && (dump_flags & TDF_DETAILS))
6075 fprintf (dump_file, " Ignoring candidate value because "
6076 "maximum unit size would be reached with %li.\n",
6077 val->local_size_cost + overall_size);
6078 return false;
6079 }
6080 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6081 &rec_count_sum, &count_sum))
6082 return false;
6083
6084 if (!dbg_cnt (ipa_cp_values))
6085 return false;
6086
6087 if (val->self_recursion_generated_p ())
6088 {
6089 /* The edge counts in this case might not have been adjusted yet.
6090 Nevertleless, even if they were it would be only a guesswork which we
6091 can do now. The recursive part of the counts can be derived from the
6092 count of the original node anyway. */
6093 if (node->count.ipa ().nonzero_p ())
6094 {
6095 unsigned dem = self_gen_clones->length () + 1;
6096 rec_count_sum = node->count.ipa ().apply_scale (1, dem);
6097 }
6098 else
6099 rec_count_sum = profile_count::zero ();
6100 }
6101
6102 /* get_info_about_necessary_edges only sums up ipa counts. */
6103 count_sum += rec_count_sum;
6104
6105 if (dump_file && (dump_flags & TDF_DETAILS))
6106 {
6107 fprintf (dump_file, " - considering value ");
6108 print_ipcp_constant_value (dump_file, val->value);
6109 fprintf (dump_file, " for ");
6110 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6111 if (offset != -1)
6112 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6113 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6114 }
6115
6116 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6117 freq_sum, count_sum,
6118 val->local_size_cost)
6119 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6120 freq_sum, count_sum, val->prop_size_cost))
6121 return false;
6122
6123 if (dump_file)
6124 fprintf (dump_file, " Creating a specialized node of %s.\n",
6125 node->dump_name ());
6126
6127 vec<tree> known_csts;
6128 vec<ipa_polymorphic_call_context> known_contexts;
6129
6130 callers = gather_edges_for_value (val, node, caller_count);
6131 if (offset == -1)
6132 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6133 else
6134 {
6135 known_csts = avals->m_known_vals.copy ();
6136 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6137 }
6138 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6139 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6140 aggvals = find_aggregate_values_for_callers_subset (node, callers);
6141 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6142 offset, val->value));
6143 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6144 aggvals, callers);
6145
6146 if (val->self_recursion_generated_p ())
6147 self_gen_clones->safe_push (val->spec_node);
6148 else
6149 update_profiling_info (node, val->spec_node);
6150
6151 callers.release ();
6152 overall_size += val->local_size_cost;
6153 if (dump_file && (dump_flags & TDF_DETAILS))
6154 fprintf (dump_file, " overall size reached %li\n",
6155 overall_size);
6156
6157 /* TODO: If for some lattice there is only one other known value
6158 left, make a special node for it too. */
6159
6160 return true;
6161 }
6162
6163 /* Decide whether and what specialized clones of NODE should be created. */
6164
6165 static bool
decide_whether_version_node(struct cgraph_node * node)6166 decide_whether_version_node (struct cgraph_node *node)
6167 {
6168 ipa_node_params *info = ipa_node_params_sum->get (node);
6169 int i, count = ipa_get_param_count (info);
6170 bool ret = false;
6171
6172 if (count == 0)
6173 return false;
6174
6175 if (dump_file && (dump_flags & TDF_DETAILS))
6176 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6177 node->dump_name ());
6178
6179 auto_vec <cgraph_node *, 9> self_gen_clones;
6180 ipa_auto_call_arg_values avals;
6181 gather_context_independent_values (info, &avals, false, NULL);
6182
6183 for (i = 0; i < count;i++)
6184 {
6185 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6186 ipcp_lattice<tree> *lat = &plats->itself;
6187 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6188
6189 if (!lat->bottom
6190 && !avals.m_known_vals[i])
6191 {
6192 ipcp_value<tree> *val;
6193 for (val = lat->values; val; val = val->next)
6194 {
6195 /* If some values generated for self-recursive calls with
6196 arithmetic jump functions fall outside of the known
6197 value_range for the parameter, we can skip them. VR interface
6198 supports this only for integers now. */
6199 if (TREE_CODE (val->value) == INTEGER_CST
6200 && !plats->m_value_range.bottom_p ()
6201 && !plats->m_value_range.m_vr.contains_p (val->value))
6202 {
6203 /* This can happen also if a constant present in the source
6204 code falls outside of the range of parameter's type, so we
6205 cannot assert. */
6206 if (dump_file && (dump_flags & TDF_DETAILS))
6207 {
6208 fprintf (dump_file, " - skipping%s value ",
6209 val->self_recursion_generated_p ()
6210 ? " self_recursion_generated" : "");
6211 print_ipcp_constant_value (dump_file, val->value);
6212 fprintf (dump_file, " because it is outside known "
6213 "value range.\n");
6214 }
6215 continue;
6216 }
6217 ret |= decide_about_value (node, i, -1, val, &avals,
6218 &self_gen_clones);
6219 }
6220 }
6221
6222 if (!plats->aggs_bottom)
6223 {
6224 struct ipcp_agg_lattice *aglat;
6225 ipcp_value<tree> *val;
6226 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6227 if (!aglat->bottom && aglat->values
6228 /* If the following is false, the one value has been considered
6229 for cloning for all contexts. */
6230 && (plats->aggs_contain_variable
6231 || !aglat->is_single_const ()))
6232 for (val = aglat->values; val; val = val->next)
6233 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6234 &self_gen_clones);
6235 }
6236
6237 if (!ctxlat->bottom
6238 && avals.m_known_contexts[i].useless_p ())
6239 {
6240 ipcp_value<ipa_polymorphic_call_context> *val;
6241 for (val = ctxlat->values; val; val = val->next)
6242 ret |= decide_about_value (node, i, -1, val, &avals,
6243 &self_gen_clones);
6244 }
6245 }
6246
6247 if (!self_gen_clones.is_empty ())
6248 {
6249 self_gen_clones.safe_push (node);
6250 update_counts_for_self_gen_clones (node, self_gen_clones);
6251 }
6252
6253 if (info->do_clone_for_all_contexts)
6254 {
6255 if (!dbg_cnt (ipa_cp_values))
6256 {
6257 info->do_clone_for_all_contexts = false;
6258 return ret;
6259 }
6260
6261 struct cgraph_node *clone;
6262 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6263
6264 for (int i = callers.length () - 1; i >= 0; i--)
6265 {
6266 cgraph_edge *cs = callers[i];
6267 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6268
6269 if (caller_info && caller_info->node_dead)
6270 callers.unordered_remove (i);
6271 }
6272
6273 if (!adjust_callers_for_value_intersection (callers, node))
6274 {
6275 /* If node is not called by anyone, or all its caller edges are
6276 self-recursive, the node is not really in use, no need to do
6277 cloning. */
6278 info->do_clone_for_all_contexts = false;
6279 return ret;
6280 }
6281
6282 if (dump_file)
6283 fprintf (dump_file, " - Creating a specialized node of %s "
6284 "for all known contexts.\n", node->dump_name ());
6285
6286 vec<tree> known_csts = avals.m_known_vals.copy ();
6287 vec<ipa_polymorphic_call_context> known_contexts
6288 = copy_useful_known_contexts (avals.m_known_contexts);
6289 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6290 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6291 ipa_agg_replacement_value *aggvals
6292 = find_aggregate_values_for_callers_subset (node, callers);
6293
6294 if (!known_contexts_useful_p (known_contexts))
6295 {
6296 known_contexts.release ();
6297 known_contexts = vNULL;
6298 }
6299 clone = create_specialized_node (node, known_csts, known_contexts,
6300 aggvals, callers);
6301 info->do_clone_for_all_contexts = false;
6302 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6303 ret = true;
6304 }
6305
6306 return ret;
6307 }
6308
6309 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6310
6311 static void
spread_undeadness(struct cgraph_node * node)6312 spread_undeadness (struct cgraph_node *node)
6313 {
6314 struct cgraph_edge *cs;
6315
6316 for (cs = node->callees; cs; cs = cs->next_callee)
6317 if (ipa_edge_within_scc (cs))
6318 {
6319 struct cgraph_node *callee;
6320 class ipa_node_params *info;
6321
6322 callee = cs->callee->function_symbol (NULL);
6323 info = ipa_node_params_sum->get (callee);
6324
6325 if (info && info->node_dead)
6326 {
6327 info->node_dead = 0;
6328 spread_undeadness (callee);
6329 }
6330 }
6331 }
6332
6333 /* Return true if NODE has a caller from outside of its SCC that is not
6334 dead. Worker callback for cgraph_for_node_and_aliases. */
6335
6336 static bool
has_undead_caller_from_outside_scc_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)6337 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6338 void *data ATTRIBUTE_UNUSED)
6339 {
6340 struct cgraph_edge *cs;
6341
6342 for (cs = node->callers; cs; cs = cs->next_caller)
6343 if (cs->caller->thunk
6344 && cs->caller->call_for_symbol_thunks_and_aliases
6345 (has_undead_caller_from_outside_scc_p, NULL, true))
6346 return true;
6347 else if (!ipa_edge_within_scc (cs))
6348 {
6349 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6350 if (!caller_info /* Unoptimized caller are like dead ones. */
6351 || !caller_info->node_dead)
6352 return true;
6353 }
6354 return false;
6355 }
6356
6357
6358 /* Identify nodes within the same SCC as NODE which are no longer needed
6359 because of new clones and will be removed as unreachable. */
6360
6361 static void
identify_dead_nodes(struct cgraph_node * node)6362 identify_dead_nodes (struct cgraph_node *node)
6363 {
6364 struct cgraph_node *v;
6365 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6366 if (v->local)
6367 {
6368 ipa_node_params *info = ipa_node_params_sum->get (v);
6369 if (info
6370 && !v->call_for_symbol_thunks_and_aliases
6371 (has_undead_caller_from_outside_scc_p, NULL, true))
6372 info->node_dead = 1;
6373 }
6374
6375 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6376 {
6377 ipa_node_params *info = ipa_node_params_sum->get (v);
6378 if (info && !info->node_dead)
6379 spread_undeadness (v);
6380 }
6381
6382 if (dump_file && (dump_flags & TDF_DETAILS))
6383 {
6384 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6385 if (ipa_node_params_sum->get (v)
6386 && ipa_node_params_sum->get (v)->node_dead)
6387 fprintf (dump_file, " Marking node as dead: %s.\n",
6388 v->dump_name ());
6389 }
6390 }
6391
6392 /* The decision stage. Iterate over the topological order of call graph nodes
6393 TOPO and make specialized clones if deemed beneficial. */
6394
6395 static void
ipcp_decision_stage(class ipa_topo_info * topo)6396 ipcp_decision_stage (class ipa_topo_info *topo)
6397 {
6398 int i;
6399
6400 if (dump_file)
6401 fprintf (dump_file, "\nIPA decision stage:\n\n");
6402
6403 for (i = topo->nnodes - 1; i >= 0; i--)
6404 {
6405 struct cgraph_node *node = topo->order[i];
6406 bool change = false, iterate = true;
6407
6408 while (iterate)
6409 {
6410 struct cgraph_node *v;
6411 iterate = false;
6412 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6413 if (v->has_gimple_body_p ()
6414 && ipcp_versionable_function_p (v))
6415 iterate |= decide_whether_version_node (v);
6416
6417 change |= iterate;
6418 }
6419 if (change)
6420 identify_dead_nodes (node);
6421 }
6422 }
6423
6424 /* Look up all the bits information that we have discovered and copy it over
6425 to the transformation summary. */
6426
6427 static void
ipcp_store_bits_results(void)6428 ipcp_store_bits_results (void)
6429 {
6430 cgraph_node *node;
6431
6432 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6433 {
6434 ipa_node_params *info = ipa_node_params_sum->get (node);
6435 bool dumped_sth = false;
6436 bool found_useful_result = false;
6437
6438 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6439 {
6440 if (dump_file)
6441 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6442 "; -fipa-bit-cp: disabled.\n",
6443 node->dump_name ());
6444 continue;
6445 }
6446
6447 if (info->ipcp_orig_node)
6448 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6449 if (!info->lattices)
6450 /* Newly expanded artificial thunks do not have lattices. */
6451 continue;
6452
6453 unsigned count = ipa_get_param_count (info);
6454 for (unsigned i = 0; i < count; i++)
6455 {
6456 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6457 if (plats->bits_lattice.constant_p ())
6458 {
6459 found_useful_result = true;
6460 break;
6461 }
6462 }
6463
6464 if (!found_useful_result)
6465 continue;
6466
6467 ipcp_transformation_initialize ();
6468 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6469 vec_safe_reserve_exact (ts->bits, count);
6470
6471 for (unsigned i = 0; i < count; i++)
6472 {
6473 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6474 ipa_bits *jfbits;
6475
6476 if (plats->bits_lattice.constant_p ())
6477 {
6478 jfbits
6479 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6480 plats->bits_lattice.get_mask ());
6481 if (!dbg_cnt (ipa_cp_bits))
6482 jfbits = NULL;
6483 }
6484 else
6485 jfbits = NULL;
6486
6487 ts->bits->quick_push (jfbits);
6488 if (!dump_file || !jfbits)
6489 continue;
6490 if (!dumped_sth)
6491 {
6492 fprintf (dump_file, "Propagated bits info for function %s:\n",
6493 node->dump_name ());
6494 dumped_sth = true;
6495 }
6496 fprintf (dump_file, " param %i: value = ", i);
6497 print_hex (jfbits->value, dump_file);
6498 fprintf (dump_file, ", mask = ");
6499 print_hex (jfbits->mask, dump_file);
6500 fprintf (dump_file, "\n");
6501 }
6502 }
6503 }
6504
6505 /* Look up all VR information that we have discovered and copy it over
6506 to the transformation summary. */
6507
6508 static void
ipcp_store_vr_results(void)6509 ipcp_store_vr_results (void)
6510 {
6511 cgraph_node *node;
6512
6513 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6514 {
6515 ipa_node_params *info = ipa_node_params_sum->get (node);
6516 bool found_useful_result = false;
6517
6518 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6519 {
6520 if (dump_file)
6521 fprintf (dump_file, "Not considering %s for VR discovery "
6522 "and propagate; -fipa-ipa-vrp: disabled.\n",
6523 node->dump_name ());
6524 continue;
6525 }
6526
6527 if (info->ipcp_orig_node)
6528 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6529 if (!info->lattices)
6530 /* Newly expanded artificial thunks do not have lattices. */
6531 continue;
6532
6533 unsigned count = ipa_get_param_count (info);
6534 for (unsigned i = 0; i < count; i++)
6535 {
6536 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6537 if (!plats->m_value_range.bottom_p ()
6538 && !plats->m_value_range.top_p ())
6539 {
6540 found_useful_result = true;
6541 break;
6542 }
6543 }
6544 if (!found_useful_result)
6545 continue;
6546
6547 ipcp_transformation_initialize ();
6548 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6549 vec_safe_reserve_exact (ts->m_vr, count);
6550
6551 for (unsigned i = 0; i < count; i++)
6552 {
6553 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6554 ipa_vr vr;
6555
6556 if (!plats->m_value_range.bottom_p ()
6557 && !plats->m_value_range.top_p ()
6558 && dbg_cnt (ipa_cp_vr))
6559 {
6560 vr.known = true;
6561 vr.type = plats->m_value_range.m_vr.kind ();
6562 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6563 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6564 }
6565 else
6566 {
6567 vr.known = false;
6568 vr.type = VR_VARYING;
6569 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6570 }
6571 ts->m_vr->quick_push (vr);
6572 }
6573 }
6574 }
6575
6576 /* The IPCP driver. */
6577
6578 static unsigned int
ipcp_driver(void)6579 ipcp_driver (void)
6580 {
6581 class ipa_topo_info topo;
6582
6583 if (edge_clone_summaries == NULL)
6584 edge_clone_summaries = new edge_clone_summary_t (symtab);
6585
6586 ipa_check_create_node_params ();
6587 ipa_check_create_edge_args ();
6588 clone_num_suffixes = new hash_map<const char *, unsigned>;
6589
6590 if (dump_file)
6591 {
6592 fprintf (dump_file, "\nIPA structures before propagation:\n");
6593 if (dump_flags & TDF_DETAILS)
6594 ipa_print_all_params (dump_file);
6595 ipa_print_all_jump_functions (dump_file);
6596 }
6597
6598 /* Topological sort. */
6599 build_toporder_info (&topo);
6600 /* Do the interprocedural propagation. */
6601 ipcp_propagate_stage (&topo);
6602 /* Decide what constant propagation and cloning should be performed. */
6603 ipcp_decision_stage (&topo);
6604 /* Store results of bits propagation. */
6605 ipcp_store_bits_results ();
6606 /* Store results of value range propagation. */
6607 ipcp_store_vr_results ();
6608
6609 /* Free all IPCP structures. */
6610 delete clone_num_suffixes;
6611 free_toporder_info (&topo);
6612 delete edge_clone_summaries;
6613 edge_clone_summaries = NULL;
6614 ipa_free_all_structures_after_ipa_cp ();
6615 if (dump_file)
6616 fprintf (dump_file, "\nIPA constant propagation end\n");
6617 return 0;
6618 }
6619
6620 /* Initialization and computation of IPCP data structures. This is the initial
6621 intraprocedural analysis of functions, which gathers information to be
6622 propagated later on. */
6623
6624 static void
ipcp_generate_summary(void)6625 ipcp_generate_summary (void)
6626 {
6627 struct cgraph_node *node;
6628
6629 if (dump_file)
6630 fprintf (dump_file, "\nIPA constant propagation start:\n");
6631 ipa_register_cgraph_hooks ();
6632
6633 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6634 ipa_analyze_node (node);
6635 }
6636
6637 namespace {
6638
6639 const pass_data pass_data_ipa_cp =
6640 {
6641 IPA_PASS, /* type */
6642 "cp", /* name */
6643 OPTGROUP_NONE, /* optinfo_flags */
6644 TV_IPA_CONSTANT_PROP, /* tv_id */
6645 0, /* properties_required */
6646 0, /* properties_provided */
6647 0, /* properties_destroyed */
6648 0, /* todo_flags_start */
6649 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6650 };
6651
6652 class pass_ipa_cp : public ipa_opt_pass_d
6653 {
6654 public:
pass_ipa_cp(gcc::context * ctxt)6655 pass_ipa_cp (gcc::context *ctxt)
6656 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6657 ipcp_generate_summary, /* generate_summary */
6658 NULL, /* write_summary */
6659 NULL, /* read_summary */
6660 ipcp_write_transformation_summaries, /*
6661 write_optimization_summary */
6662 ipcp_read_transformation_summaries, /*
6663 read_optimization_summary */
6664 NULL, /* stmt_fixup */
6665 0, /* function_transform_todo_flags_start */
6666 ipcp_transform_function, /* function_transform */
6667 NULL) /* variable_transform */
6668 {}
6669
6670 /* opt_pass methods: */
gate(function *)6671 virtual bool gate (function *)
6672 {
6673 /* FIXME: We should remove the optimize check after we ensure we never run
6674 IPA passes when not optimizing. */
6675 return (flag_ipa_cp && optimize) || in_lto_p;
6676 }
6677
execute(function *)6678 virtual unsigned int execute (function *) { return ipcp_driver (); }
6679
6680 }; // class pass_ipa_cp
6681
6682 } // anon namespace
6683
6684 ipa_opt_pass_d *
make_pass_ipa_cp(gcc::context * ctxt)6685 make_pass_ipa_cp (gcc::context *ctxt)
6686 {
6687 return new pass_ipa_cp (ctxt);
6688 }
6689
6690 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6691 within the same process. For use by toplev::finalize. */
6692
6693 void
ipa_cp_cc_finalize(void)6694 ipa_cp_cc_finalize (void)
6695 {
6696 base_count = profile_count::uninitialized ();
6697 overall_size = 0;
6698 orig_overall_size = 0;
6699 ipcp_free_transformation_sum ();
6700 }
6701