1 /* IPA function body analysis.
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Jan Hubicka
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #ifndef GCC_IPA_SUMMARY_H
22 #define GCC_IPA_SUMMARY_H
23 
24 #include "sreal.h"
25 #include "ipa-predicate.h"
26 
27 
28 /* Hints are reasons why IPA heuristics should prefer specializing given
29    function.  They are represented as bitmap of the following values.  */
30 enum ipa_hints_vals {
31   /* When specialization turns indirect call into a direct call,
32      it is good idea to do so.  */
33   INLINE_HINT_indirect_call = 1,
34   /* Inlining may make loop iterations or loop stride known.  It is good idea
35      to do so because it enables loop optimizations.  */
36   INLINE_HINT_loop_iterations = 2,
37   INLINE_HINT_loop_stride = 4,
38   /* Inlining within same strongly connected component of callgraph is often
39      a loss due to increased stack frame usage and prologue setup costs.  */
40   INLINE_HINT_same_scc = 8,
41   /* Inlining functions in strongly connected component is not such a great
42      win.  */
43   INLINE_HINT_in_scc = 16,
44   /* If function is declared inline by user, it may be good idea to inline
45      it.  Set by simple_edge_hints in ipa-inline-analysis.cc.  */
46   INLINE_HINT_declared_inline = 32,
47   /* Programs are usually still organized for non-LTO compilation and thus
48      if functions are in different modules, inlining may not be so important.
49      Set by simple_edge_hints in ipa-inline-analysis.cc.   */
50   INLINE_HINT_cross_module = 64,
51   /* We know that the callee is hot by profile.  */
52   INLINE_HINT_known_hot = 128,
53   /* There is builtin_constant_p dependent on parameter which is usually
54      a strong hint to inline.  */
55   INLINE_HINT_builtin_constant_p = 256
56 };
57 
58 typedef int ipa_hints;
59 
60 /* Simple description of whether a memory load or a condition refers to a load
61    from an aggregate and if so, how and where from in the aggregate.
62    Individual fields have the same meaning like fields with the same name in
63    struct condition.  */
64 
65 struct agg_position_info
66 {
67   HOST_WIDE_INT offset;
68   bool agg_contents;
69   bool by_ref;
70 };
71 
72 /* Representation of function body size and time depending on the call
73    context.  We keep simple array of record, every containing of predicate
74    and time/size to account.  */
75 class size_time_entry
76 {
77 public:
78   /* Predicate for code to be executed.  */
79   ipa_predicate exec_predicate;
80   /* Predicate for value to be constant and optimized out in a specialized copy.
81      When deciding on specialization this makes it possible to see how much
82      the executed code paths will simplify.  */
83   ipa_predicate nonconst_predicate;
84   int size;
85   sreal time;
86 };
87 
88 /* Summary about function and stack frame sizes.  We keep this info
89    for inline clones and also for WPA streaming. For this reason this is not
90    part of ipa_fn_summary which exists only for offline functions.  */
91 class ipa_size_summary
92 {
93 public:
94   /* Estimated stack frame consumption by the function.  */
95   HOST_WIDE_INT estimated_self_stack_size;
96   /* Size of the function body.  */
97   int self_size;
98   /* Estimated size of the function after inlining.  */
99   int size;
100 
ipa_size_summary()101   ipa_size_summary ()
102   : estimated_self_stack_size (0), self_size (0), size (0)
103   {
104   }
105 };
106 
107 /* Structure to capture how frequently some interesting events occur given a
108    particular predicate.  The structure is used to estimate how often we
109    encounter loops with known iteration count or stride in various
110    contexts.  */
111 
112 struct GTY(()) ipa_freqcounting_predicate
113 {
114   /* The described event happens with this frequency... */
115   sreal freq;
116   /* ...when this predicate evaluates to false. */
117   ipa_predicate * GTY((skip)) predicate;
118 };
119 
120 /* Function inlining information.  */
class()121 class GTY(()) ipa_fn_summary
122 {
123 public:
124   /* Keep all field empty so summary dumping works during its computation.
125      This is useful for debugging.  */
126   ipa_fn_summary ()
127     : min_size (0),
128       inlinable (false), single_caller (false),
129       fp_expressions (false), target_info (0),
130       estimated_stack_size (false),
131       time (0), conds (NULL),
132       size_time_table (), call_size_time_table (vNULL),
133       loop_iterations (NULL), loop_strides (NULL),
134       builtin_constant_p_parms (vNULL),
135       growth (0), scc_no (0)
136   {
137   }
138 
139   /* Copy constructor.  */
140   ipa_fn_summary (const ipa_fn_summary &s)
141     : min_size (s.min_size),
142     inlinable (s.inlinable), single_caller (s.single_caller),
143     fp_expressions (s.fp_expressions),
144     target_info (s.target_info),
145     estimated_stack_size (s.estimated_stack_size),
146     time (s.time), conds (s.conds), size_time_table (),
147     call_size_time_table (vNULL),
148     loop_iterations (s.loop_iterations), loop_strides (s.loop_strides),
149     builtin_constant_p_parms (s.builtin_constant_p_parms),
150     growth (s.growth), scc_no (s.scc_no)
151   {}
152 
153   /* Default constructor.  */
154   ~ipa_fn_summary ();
155 
156   /* Information about the function body itself.  */
157 
158   /* Minimal size increase after inlining.  */
159   int min_size;
160 
161   /* False when there something makes inlining impossible (such as va_arg).  */
162   unsigned inlinable : 1;
163   /* True wen there is only one caller of the function before small function
164      inlining.  */
165   unsigned int single_caller : 1;
166   /* True if function contains any floating point expressions.  */
167   unsigned int fp_expressions : 1;
168   /* Like fp_expressions field above, but it's to hold some target specific
169      information, such as some target specific isa flags.  Note that for
170      offloading target compilers, this field isn't streamed.  */
171   unsigned int target_info;
172 
173   /* Information about function that will result after applying all the
174      inline decisions present in the callgraph.  Generally kept up to
175      date only for functions that are not inline clones. */
176 
177   /* Estimated stack frame consumption by the function.  */
178   HOST_WIDE_INT estimated_stack_size;
179   /* Estimated runtime of function after inlining.  */
180   sreal GTY((skip)) time;
181 
182   /* Conditional size/time information.  The summaries are being
183      merged during inlining.  */
184   conditions conds;
185   /* Normal code is accounted in size_time_table, while calls are
186      accounted in call_size_time_table.  This is because calls
187      are often adjusted by IPA optimizations and thus this summary
188      is generated from call summary information when needed.  */
189   auto_vec<size_time_entry> GTY((skip)) size_time_table;
190   /* Unlike size_time_table that is initialized for all summaries
191      call_size_time_table is allocated only for functions with
192      many calls.  Use effecient vl_ptr storage.  */
193   vec<size_time_entry, va_heap, vl_ptr> GTY((skip)) call_size_time_table;
194 
195   /* Predicates on when some loops in the function can have known bounds.  */
196   vec<ipa_freqcounting_predicate, va_gc> *loop_iterations;
197   /* Predicates on when some loops in the function can have known strides.  */
198   vec<ipa_freqcounting_predicate, va_gc> *loop_strides;
199   /* Parameters tested by builtin_constant_p.  */
200   vec<int, va_heap, vl_ptr> GTY((skip)) builtin_constant_p_parms;
201   /* Estimated growth for inlining all copies of the function before start
202      of small functions inlining.
203      This value will get out of date as the callers are duplicated, but
204      using up-to-date value in the badness metric mean a lot of extra
205      expenses.  */
206   int growth;
207   /* Number of SCC on the beginning of inlining process.  */
208   int scc_no;
209 
210   /* Record time and size under given predicates.  */
211   void account_size_time (int, sreal, const ipa_predicate &,
212 			  const ipa_predicate &,
213 		  	  bool call = false);
214 
215   /* We keep values scaled up, so fractional sizes can be accounted.  */
216   static const int size_scale = 2;
217   /* Maximal size of size_time_table before we start to be conservative.  */
218   static const int max_size_time_table_size = 256;
219 };
220 
class(user)221 class GTY((user)) ipa_fn_summary_t:
222   public fast_function_summary <ipa_fn_summary *, va_gc>
223 {
224 public:
225   ipa_fn_summary_t (symbol_table *symtab):
226     fast_function_summary <ipa_fn_summary *, va_gc> (symtab) {}
227 
228   static ipa_fn_summary_t *create_ggc (symbol_table *symtab)
229   {
230     class ipa_fn_summary_t *summary
231       = new (ggc_alloc_no_dtor<ipa_fn_summary_t> ()) ipa_fn_summary_t (symtab);
232     summary->disable_insertion_hook ();
233     return summary;
234   }
235 
236   /* Remove ipa_fn_summary for all callees of NODE.  */
237   void remove_callees (cgraph_node *node);
238 
239   virtual void insert (cgraph_node *, ipa_fn_summary *);
240   virtual void remove (cgraph_node *node, ipa_fn_summary *)
241   {
242     remove_callees (node);
243   }
244 
245   virtual void duplicate (cgraph_node *src, cgraph_node *dst,
246 			  ipa_fn_summary *src_data, ipa_fn_summary *dst_data);
247 };
248 
249 extern GTY(()) fast_function_summary <ipa_fn_summary *, va_gc>
250   *ipa_fn_summaries;
251 
252 class ipa_size_summary_t:
253   public fast_function_summary <ipa_size_summary *, va_heap>
254 {
255 public:
ipa_size_summary_t(symbol_table * symtab)256   ipa_size_summary_t (symbol_table *symtab):
257     fast_function_summary <ipa_size_summary *, va_heap> (symtab)
258   {
259     disable_insertion_hook ();
260   }
261 
duplicate(cgraph_node *,cgraph_node *,ipa_size_summary * src_data,ipa_size_summary * dst_data)262   virtual void duplicate (cgraph_node *, cgraph_node *,
263 			  ipa_size_summary *src_data,
264 			  ipa_size_summary *dst_data)
265   {
266     *dst_data = *src_data;
267   }
268 };
269 extern fast_function_summary <ipa_size_summary *, va_heap>
270   *ipa_size_summaries;
271 
272 /* Information kept about callgraph edges.  */
273 class ipa_call_summary
274 {
275 public:
276   /* Keep all field empty so summary dumping works during its computation.
277      This is useful for debugging.  */
ipa_call_summary()278   ipa_call_summary ()
279     : predicate (NULL), param (vNULL), call_stmt_size (0), call_stmt_time (0),
280       loop_depth (0), is_return_callee_uncaptured (false)
281     {
282     }
283 
284   /* Copy constructor.  */
ipa_call_summary(const ipa_call_summary & s)285   ipa_call_summary (const ipa_call_summary &s):
286     predicate (s.predicate), param (s.param), call_stmt_size (s.call_stmt_size),
287     call_stmt_time (s.call_stmt_time), loop_depth (s.loop_depth),
288     is_return_callee_uncaptured (s.is_return_callee_uncaptured)
289   {
290   }
291 
292   /* Default destructor.  */
293   ~ipa_call_summary ();
294 
295   ipa_predicate *predicate;
296   /* Vector indexed by parameters.  */
297   vec<inline_param_summary> param;
298   /* Estimated size and time of the call statement.  */
299   int call_stmt_size;
300   int call_stmt_time;
301   /* Depth of loop nest, 0 means no nesting.  */
302   unsigned int loop_depth;
303   /* Indicates whether the caller returns the value of it's callee.  */
304   bool is_return_callee_uncaptured;
305 };
306 
307 class ipa_call_summary_t: public fast_call_summary <ipa_call_summary *, va_heap>
308 {
309 public:
ipa_call_summary_t(symbol_table * symtab)310   ipa_call_summary_t (symbol_table *symtab):
311     fast_call_summary <ipa_call_summary *, va_heap> (symtab) {}
312 
313   /* Hook that is called by summary when an edge is duplicated.  */
314   virtual void duplicate (cgraph_edge *src, cgraph_edge *dst,
315 			  ipa_call_summary *src_data,
316 			  ipa_call_summary *dst_data);
317 };
318 
319 /* Estimated execution times, code sizes and other information about the
320    code executing a call described by ipa_call_context.  */
321 
322 struct ipa_call_estimates
323 {
324   /* Estimated size needed to execute call in the given context. */
325   int size;
326 
327   /* Minimal size needed for the call that is + independent on the call context
328      and can be used for fast estimates.  */
329   int min_size;
330 
331   /* Estimated time needed to execute call in the given context. */
332   sreal time;
333 
334   /* Estimated time needed to execute the function when not ignoring
335      computations known to be constant in this context.  */
336   sreal nonspecialized_time;
337 
338   /* Further discovered reasons why to inline or specialize the give calls.  */
339   ipa_hints hints;
340 
341   /* Frequency how often a loop with known number of iterations is encountered.
342      Calculated with hints.  */
343   sreal loops_with_known_iterations;
344 
345   /* Frequency how often a loop with known strides is encountered.  Calculated
346      with hints.  */
347   sreal loops_with_known_strides;
348 };
349 
350 class ipa_cached_call_context;
351 
352 /* This object describe a context of call.  That is a summary of known
353    information about its parameters.  Main purpose of this context is
354    to give more realistic estimations of function runtime, size and
355    inline hints.  */
356 class ipa_call_context
357 {
358 public:
359   ipa_call_context (cgraph_node *node,
360       		    clause_t possible_truths,
361 		    clause_t nonspec_possible_truths,
362 		    vec<inline_param_summary> inline_param_summary,
363 		    ipa_auto_call_arg_values *arg_values);
ipa_call_context()364   ipa_call_context ()
365   : m_node(NULL)
366   {
367   }
368   void estimate_size_and_time (ipa_call_estimates *estimates,
369 			       bool est_times = true, bool est_hints = true);
370   bool equal_to (const ipa_call_context &);
exists_p()371   bool exists_p ()
372   {
373     return m_node != NULL;
374   }
375 private:
376   /* Called function.  */
377   cgraph_node *m_node;
378   /* Clause describing what predicate conditionals can be satisfied
379      in this context if function is inlined/specialized.  */
380   clause_t m_possible_truths;
381   /* Clause describing what predicate conditionals can be satisfied
382      in this context if function is kept offline.  */
383   clause_t m_nonspec_possible_truths;
384   /* Inline summary maintains info about change probabilities.  */
385   vec<inline_param_summary> m_inline_param_summary;
386 
387   /* Even after having calculated clauses, the information about argument
388      values is used to resolve indirect calls.  */
389   ipa_call_arg_values m_avals;
390 
391   friend ipa_cached_call_context;
392 };
393 
394 /* Variant of ipa_call_context that is stored in a cache over a longer period
395    of time.  */
396 
397 class ipa_cached_call_context : public ipa_call_context
398 {
399 public:
400   void duplicate_from (const ipa_call_context &ctx);
401   void release ();
402 };
403 
404 extern fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
405 
406 /* In ipa-fnsummary.cc  */
407 void ipa_debug_fn_summary (struct cgraph_node *);
408 void ipa_dump_fn_summaries (FILE *f);
409 void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node);
410 void ipa_dump_hints (FILE *f, ipa_hints);
411 void ipa_free_fn_summary (void);
412 void ipa_free_size_summary (void);
413 void inline_analyze_function (struct cgraph_node *node);
414 void estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
415 					ipa_auto_call_arg_values *avals,
416 					ipa_call_estimates *estimates);
417 void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge);
418 void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true);
419 void compute_fn_summary (struct cgraph_node *, bool);
420 bool refs_local_or_readonly_memory_p (tree);
421 bool points_to_local_or_readonly_memory_p (tree);
422 
423 
424 void evaluate_properties_for_edge (struct cgraph_edge *e,
425 	       		           bool inline_p,
426 				   clause_t *clause_ptr,
427 				   clause_t *nonspec_clause_ptr,
428 				   ipa_auto_call_arg_values *avals,
429 				   bool compute_contexts);
430 
431 void ipa_fnsummary_cc_finalize (void);
432 HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
433 void ipa_remove_from_growth_caches (struct cgraph_edge *edge);
434 
435 /* Return true if EDGE is a cross module call.  */
436 
437 static inline bool
cross_module_call_p(struct cgraph_edge * edge)438 cross_module_call_p (struct cgraph_edge *edge)
439 {
440   /* Here we do not want to walk to alias target becuase ICF may create
441      cross-unit aliases.  */
442   if (edge->caller->unit_id == edge->callee->unit_id)
443     return false;
444   /* If the call is to a (former) comdat function or s symbol with mutiple
445      extern inline definitions then treat is as in-module call.  */
446   if (edge->callee->merged_extern_inline || edge->callee->merged_comdat
447       || DECL_COMDAT (edge->callee->decl))
448     return false;
449   return true;
450 }
451 
452 #endif /* GCC_IPA_FNSUMMARY_H */
453