1 /* Basic IPA optimizations based on profile.
2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* ipa-profile pass implements the following analysis propagating profille
21    inter-procedurally.
22 
23    - Count histogram construction.  This is a histogram analyzing how much
24      time is spent executing statements with a given execution count read
25      from profile feedback. This histogram is complete only with LTO,
26      otherwise it contains information only about the current unit.
27 
28      The information is used to set hot/cold thresholds.
29    - Next speculative indirect call resolution is performed:  the local
30      profile pass assigns profile-id to each function and provide us with a
31      histogram specifying the most common target.  We look up the callgraph
32      node corresponding to the target and produce a speculative call.
33 
34      This call may or may not survive through IPA optimization based on decision
35      of inliner.
36    - Finally we propagate the following flags: unlikely executed, executed
37      once, executed at startup and executed at exit.  These flags are used to
38      control code size/performance threshold and code placement (by producing
39      .text.unlikely/.text.hot/.text.startup/.text.exit subsections).  */
40 #include "config.h"
41 #include "system.h"
42 #include "coretypes.h"
43 #include "backend.h"
44 #include "tree.h"
45 #include "gimple.h"
46 #include "predict.h"
47 #include "alloc-pool.h"
48 #include "tree-pass.h"
49 #include "cgraph.h"
50 #include "data-streamer.h"
51 #include "gimple-iterator.h"
52 #include "ipa-utils.h"
53 #include "profile.h"
54 #include "value-prof.h"
55 #include "tree-inline.h"
56 #include "symbol-summary.h"
57 #include "tree-vrp.h"
58 #include "ipa-prop.h"
59 #include "ipa-fnsummary.h"
60 
61 /* Entry in the histogram.  */
62 
63 struct histogram_entry
64 {
65   gcov_type count;
66   int time;
67   int size;
68 };
69 
70 /* Histogram of profile values.
71    The histogram is represented as an ordered vector of entries allocated via
72    histogram_pool. During construction a separate hashtable is kept to lookup
73    duplicate entries.  */
74 
75 vec<histogram_entry *> histogram;
76 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
77 
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR.  */
79 
80 struct histogram_hash : nofree_ptr_hash <histogram_entry>
81 {
82   static inline hashval_t hash (const histogram_entry *);
83   static inline int equal (const histogram_entry *, const histogram_entry *);
84 };
85 
86 inline hashval_t
hash(const histogram_entry * val)87 histogram_hash::hash (const histogram_entry *val)
88 {
89   return val->count;
90 }
91 
92 inline int
equal(const histogram_entry * val,const histogram_entry * val2)93 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
94 {
95   return val->count == val2->count;
96 }
97 
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99    HASHTABLE is the on-side hash kept to avoid duplicates.  */
100 
101 static void
account_time_size(hash_table<histogram_hash> * hashtable,vec<histogram_entry * > & histogram,gcov_type count,int time,int size)102 account_time_size (hash_table<histogram_hash> *hashtable,
103 		   vec<histogram_entry *> &histogram,
104 		   gcov_type count, int time, int size)
105 {
106   histogram_entry key = {count, 0, 0};
107   histogram_entry **val = hashtable->find_slot (&key, INSERT);
108 
109   if (!*val)
110     {
111       *val = histogram_pool.allocate ();
112       **val = key;
113       histogram.safe_push (*val);
114     }
115   (*val)->time += time;
116   (*val)->size += size;
117 }
118 
119 int
cmp_counts(const void * v1,const void * v2)120 cmp_counts (const void *v1, const void *v2)
121 {
122   const histogram_entry *h1 = *(const histogram_entry * const *)v1;
123   const histogram_entry *h2 = *(const histogram_entry * const *)v2;
124   if (h1->count < h2->count)
125     return 1;
126   if (h1->count > h2->count)
127     return -1;
128   return 0;
129 }
130 
131 /* Dump HISTOGRAM to FILE.  */
132 
133 static void
dump_histogram(FILE * file,vec<histogram_entry * > histogram)134 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
135 {
136   unsigned int i;
137   gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
138 	    overall_size = 0;
139 
140   fprintf (dump_file, "Histogram:\n");
141   for (i = 0; i < histogram.length (); i++)
142     {
143       overall_time += histogram[i]->count * histogram[i]->time;
144       overall_size += histogram[i]->size;
145     }
146   if (!overall_time)
147     overall_time = 1;
148   if (!overall_size)
149     overall_size = 1;
150   for (i = 0; i < histogram.length (); i++)
151     {
152       cumulated_time += histogram[i]->count * histogram[i]->time;
153       cumulated_size += histogram[i]->size;
154       fprintf (file, "  %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
155 	       (int64_t) histogram[i]->count,
156 	       histogram[i]->time,
157 	       cumulated_time * 100.0 / overall_time,
158 	       histogram[i]->size,
159 	       cumulated_size * 100.0 / overall_size);
160    }
161 }
162 
163 /* Structure containing speculative target information from profile.  */
164 
165 struct speculative_call_target
166 {
167   speculative_call_target (unsigned int id = 0, int prob = 0)
target_idspeculative_call_target168     : target_id (id), target_probability (prob)
169   {
170   }
171 
172   /* Profile_id of target obtained from profile.  */
173   unsigned int target_id;
174   /* Probability that call will land in function with target_id.  */
175   unsigned int target_probability;
176 };
177 
178 class speculative_call_summary
179 {
180 public:
speculative_call_summary()181   speculative_call_summary () : speculative_call_targets ()
182   {}
183 
184   auto_vec<speculative_call_target> speculative_call_targets;
185 
186   void dump (FILE *f);
187 
188 };
189 
190   /* Class to manage call summaries.  */
191 
192 class ipa_profile_call_summaries
193   : public call_summary<speculative_call_summary *>
194 {
195 public:
ipa_profile_call_summaries(symbol_table * table)196   ipa_profile_call_summaries (symbol_table *table)
197     : call_summary<speculative_call_summary *> (table)
198   {}
199 
200   /* Duplicate info when an edge is cloned.  */
201   virtual void duplicate (cgraph_edge *, cgraph_edge *,
202 			  speculative_call_summary *old_sum,
203 			  speculative_call_summary *new_sum);
204 };
205 
206 static ipa_profile_call_summaries *call_sums = NULL;
207 
208 /* Dump all information in speculative call summary to F.  */
209 
210 void
dump(FILE * f)211 speculative_call_summary::dump (FILE *f)
212 {
213   cgraph_node *n2;
214 
215   unsigned spec_count = speculative_call_targets.length ();
216   for (unsigned i = 0; i < spec_count; i++)
217     {
218       speculative_call_target item = speculative_call_targets[i];
219       n2 = find_func_by_profile_id (item.target_id);
220       if (n2)
221 	fprintf (f, "    The %i speculative target is %s with prob %3.2f\n", i,
222 		 n2->dump_name (),
223 		 item.target_probability / (float) REG_BR_PROB_BASE);
224       else
225 	fprintf (f, "    The %i speculative target is %u with prob %3.2f\n", i,
226 		 item.target_id,
227 		 item.target_probability / (float) REG_BR_PROB_BASE);
228     }
229 }
230 
231 /* Duplicate info when an edge is cloned.  */
232 
233 void
duplicate(cgraph_edge *,cgraph_edge *,speculative_call_summary * old_sum,speculative_call_summary * new_sum)234 ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
235 				       speculative_call_summary *old_sum,
236 				       speculative_call_summary *new_sum)
237 {
238   if (!old_sum)
239     return;
240 
241   unsigned old_count = old_sum->speculative_call_targets.length ();
242   if (!old_count)
243     return;
244 
245   new_sum->speculative_call_targets.reserve_exact (old_count);
246   new_sum->speculative_call_targets.quick_grow_cleared (old_count);
247 
248   for (unsigned i = 0; i < old_count; i++)
249     {
250       new_sum->speculative_call_targets[i]
251 	= old_sum->speculative_call_targets[i];
252     }
253 }
254 
255 /* Collect histogram and speculative target summaries from CFG profiles.  */
256 
257 static void
ipa_profile_generate_summary(void)258 ipa_profile_generate_summary (void)
259 {
260   struct cgraph_node *node;
261   gimple_stmt_iterator gsi;
262   basic_block bb;
263 
264   hash_table<histogram_hash> hashtable (10);
265 
266   gcc_checking_assert (!call_sums);
267   call_sums = new ipa_profile_call_summaries (symtab);
268 
269   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
270     if (ENTRY_BLOCK_PTR_FOR_FN
271 	  (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
272       FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
273 	{
274 	  int time = 0;
275 	  int size = 0;
276 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
277 	    {
278 	      gimple *stmt = gsi_stmt (gsi);
279 	      if (gimple_code (stmt) == GIMPLE_CALL
280 		  && !gimple_call_fndecl (stmt))
281 		{
282 		  histogram_value h;
283 		  h = gimple_histogram_value_of_type
284 			(DECL_STRUCT_FUNCTION (node->decl),
285 			 stmt, HIST_TYPE_INDIR_CALL);
286 		  /* No need to do sanity check: gimple_ic_transform already
287 		     takes away bad histograms.  */
288 		  if (h)
289 		    {
290 		      gcov_type val, count, all;
291 		      struct cgraph_edge *e = node->get_edge (stmt);
292 		      if (e && !e->indirect_unknown_callee)
293 			continue;
294 
295 		      speculative_call_summary *csum
296 			= call_sums->get_create (e);
297 
298 		      for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
299 			   j++)
300 			{
301 			  if (!get_nth_most_common_value (NULL, "indirect call",
302 							  h, &val, &count, &all,
303 							  j))
304 			    continue;
305 
306 			  if (val == 0 || count == 0)
307 			    continue;
308 
309 			  if (count > all)
310 			    {
311 			      if (dump_file)
312 				fprintf (dump_file,
313 					 "Probability capped to 1\n");
314 			      count = all;
315 			    }
316 			  speculative_call_target item (
317 			    val, GCOV_COMPUTE_SCALE (count, all));
318 			  csum->speculative_call_targets.safe_push (item);
319 			}
320 
321 		      gimple_remove_histogram_value
322 			 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
323 		    }
324 		}
325 	      time += estimate_num_insns (stmt, &eni_time_weights);
326 	      size += estimate_num_insns (stmt, &eni_size_weights);
327 	    }
328 	  if (bb->count.ipa_p () && bb->count.initialized_p ())
329 	    account_time_size (&hashtable, histogram,
330 			       bb->count.ipa ().to_gcov_type (),
331 			       time, size);
332 	}
333   histogram.qsort (cmp_counts);
334 }
335 
336 /* Serialize the speculative summary info for LTO.  */
337 
338 static void
ipa_profile_write_edge_summary(lto_simple_output_block * ob,speculative_call_summary * csum)339 ipa_profile_write_edge_summary (lto_simple_output_block *ob,
340 				speculative_call_summary *csum)
341 {
342   unsigned len = 0;
343 
344   len = csum->speculative_call_targets.length ();
345 
346   gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
347 
348   streamer_write_hwi_stream (ob->main_stream, len);
349 
350   if (len)
351     {
352       unsigned spec_count = csum->speculative_call_targets.length ();
353       for (unsigned i = 0; i < spec_count; i++)
354 	{
355 	  speculative_call_target item = csum->speculative_call_targets[i];
356 	  gcc_assert (item.target_id);
357 	  streamer_write_hwi_stream (ob->main_stream, item.target_id);
358 	  streamer_write_hwi_stream (ob->main_stream, item.target_probability);
359 	}
360     }
361 }
362 
363 /* Serialize the ipa info for lto.  */
364 
365 static void
ipa_profile_write_summary(void)366 ipa_profile_write_summary (void)
367 {
368   struct lto_simple_output_block *ob
369     = lto_create_simple_output_block (LTO_section_ipa_profile);
370   unsigned int i;
371 
372   streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
373   for (i = 0; i < histogram.length (); i++)
374     {
375       streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
376       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
377       streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
378     }
379 
380   if (!call_sums)
381     return;
382 
383   /* Serialize speculative targets information.  */
384   unsigned int count = 0;
385   lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
386   lto_symtab_encoder_iterator lsei;
387   cgraph_node *node;
388 
389   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
390        lsei_next_function_in_partition (&lsei))
391     {
392       node = lsei_cgraph_node (lsei);
393       if (node->definition && node->has_gimple_body_p ()
394 	  && node->indirect_calls)
395 	count++;
396     }
397 
398   streamer_write_uhwi_stream (ob->main_stream, count);
399 
400   /* Process all of the functions.  */
401   for (lsei = lsei_start_function_in_partition (encoder);
402        !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
403     {
404       cgraph_node *node = lsei_cgraph_node (lsei);
405       if (node->definition && node->has_gimple_body_p ()
406 	  && node->indirect_calls)
407 	{
408 	  int node_ref = lto_symtab_encoder_encode (encoder, node);
409 	  streamer_write_uhwi_stream (ob->main_stream, node_ref);
410 
411 	  for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
412 	    {
413 	      speculative_call_summary *csum = call_sums->get_create (e);
414 	      ipa_profile_write_edge_summary (ob, csum);
415 	    }
416       }
417     }
418 
419   lto_destroy_simple_output_block (ob);
420 }
421 
422 /* Dump all profile summary data for all cgraph nodes and edges to file F.  */
423 
424 static void
ipa_profile_dump_all_summaries(FILE * f)425 ipa_profile_dump_all_summaries (FILE *f)
426 {
427   fprintf (dump_file,
428 	   "\n========== IPA-profile speculative targets: ==========\n");
429   cgraph_node *node;
430   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
431     {
432       fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
433       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
434 	{
435 	  fprintf (f, "  Summary for %s of indirect edge %d:\n",
436 		   e->caller->dump_name (), e->lto_stmt_uid);
437 	  speculative_call_summary *csum = call_sums->get_create (e);
438 	  csum->dump (f);
439 	}
440     }
441   fprintf (f, "\n\n");
442 }
443 
444 /* Read speculative targets information about edge for LTO WPA.  */
445 
446 static void
ipa_profile_read_edge_summary(class lto_input_block * ib,cgraph_edge * edge)447 ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
448 {
449   unsigned i, len;
450 
451   len = streamer_read_hwi (ib);
452   gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
453   speculative_call_summary *csum = call_sums->get_create (edge);
454 
455   for (i = 0; i < len; i++)
456   {
457     unsigned int target_id = streamer_read_hwi (ib);
458     int target_probability = streamer_read_hwi (ib);
459     speculative_call_target item (target_id, target_probability);
460     csum->speculative_call_targets.safe_push (item);
461   }
462 }
463 
464 /* Read profile speculative targets section information for LTO WPA.  */
465 
466 static void
ipa_profile_read_summary_section(struct lto_file_decl_data * file_data,class lto_input_block * ib)467 ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
468 				  class lto_input_block *ib)
469 {
470   if (!ib)
471     return;
472 
473   lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
474 
475   unsigned int count = streamer_read_uhwi (ib);
476 
477   unsigned int i;
478   unsigned int index;
479   cgraph_node * node;
480 
481   for (i = 0; i < count; i++)
482     {
483       index = streamer_read_uhwi (ib);
484       encoder = file_data->symtab_node_encoder;
485       node
486 	= dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
487 
488       for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
489 	ipa_profile_read_edge_summary (ib, e);
490     }
491 }
492 
493 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
494    */
495 
496 static void
ipa_profile_read_summary(void)497 ipa_profile_read_summary (void)
498 {
499   struct lto_file_decl_data ** file_data_vec
500     = lto_get_file_decl_data ();
501   struct lto_file_decl_data * file_data;
502   int j = 0;
503 
504   hash_table<histogram_hash> hashtable (10);
505 
506   gcc_checking_assert (!call_sums);
507   call_sums = new ipa_profile_call_summaries (symtab);
508 
509   while ((file_data = file_data_vec[j++]))
510     {
511       const char *data;
512       size_t len;
513       class lto_input_block *ib
514 	= lto_create_simple_input_block (file_data,
515 					 LTO_section_ipa_profile,
516 					 &data, &len);
517       if (ib)
518 	{
519           unsigned int num = streamer_read_uhwi (ib);
520 	  unsigned int n;
521 	  for (n = 0; n < num; n++)
522 	    {
523 	      gcov_type count = streamer_read_gcov_count (ib);
524 	      int time = streamer_read_uhwi (ib);
525 	      int size = streamer_read_uhwi (ib);
526 	      account_time_size (&hashtable, histogram,
527 				 count, time, size);
528 	    }
529 
530 	  ipa_profile_read_summary_section (file_data, ib);
531 
532 	  lto_destroy_simple_input_block (file_data,
533 					  LTO_section_ipa_profile,
534 					  ib, data, len);
535 	}
536     }
537   histogram.qsort (cmp_counts);
538 }
539 
540 /* Data used by ipa_propagate_frequency.  */
541 
542 struct ipa_propagate_frequency_data
543 {
544   cgraph_node *function_symbol;
545   bool maybe_unlikely_executed;
546   bool maybe_executed_once;
547   bool only_called_at_startup;
548   bool only_called_at_exit;
549 };
550 
551 /* Worker for ipa_propagate_frequency_1.  */
552 
553 static bool
ipa_propagate_frequency_1(struct cgraph_node * node,void * data)554 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
555 {
556   struct ipa_propagate_frequency_data *d;
557   struct cgraph_edge *edge;
558 
559   d = (struct ipa_propagate_frequency_data *)data;
560   for (edge = node->callers;
561        edge && (d->maybe_unlikely_executed || d->maybe_executed_once
562 	        || d->only_called_at_startup || d->only_called_at_exit);
563        edge = edge->next_caller)
564     {
565       if (edge->caller != d->function_symbol)
566 	{
567           d->only_called_at_startup &= edge->caller->only_called_at_startup;
568 	  /* It makes sense to put main() together with the static constructors.
569 	     It will be executed for sure, but rest of functions called from
570 	     main are definitely not at startup only.  */
571 	  if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
572 	    d->only_called_at_startup = 0;
573           d->only_called_at_exit &= edge->caller->only_called_at_exit;
574 	}
575 
576       /* When profile feedback is available, do not try to propagate too hard;
577 	 counts are already good guide on function frequencies and roundoff
578 	 errors can make us to push function into unlikely section even when
579 	 it is executed by the train run.  Transfer the function only if all
580 	 callers are unlikely executed.  */
581       if (profile_info
582 	  && !(edge->callee->count.ipa () == profile_count::zero ())
583 	  && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
584 	      || (edge->caller->inlined_to
585 		  && edge->caller->inlined_to->frequency
586 		     != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
587 	  d->maybe_unlikely_executed = false;
588       if (edge->count.ipa ().initialized_p ()
589 	  && !edge->count.ipa ().nonzero_p ())
590 	continue;
591       switch (edge->caller->frequency)
592         {
593 	case NODE_FREQUENCY_UNLIKELY_EXECUTED:
594 	  break;
595 	case NODE_FREQUENCY_EXECUTED_ONCE:
596 	  {
597 	    if (dump_file && (dump_flags & TDF_DETAILS))
598 	      fprintf (dump_file, "  Called by %s that is executed once\n",
599 		       edge->caller->dump_name ());
600 	    d->maybe_unlikely_executed = false;
601 	    ipa_call_summary *s = ipa_call_summaries->get (edge);
602 	    if (s != NULL && s->loop_depth)
603 	      {
604 		d->maybe_executed_once = false;
605 		if (dump_file && (dump_flags & TDF_DETAILS))
606 		  fprintf (dump_file, "  Called in loop\n");
607 	      }
608 	    break;
609 	  }
610 	case NODE_FREQUENCY_HOT:
611 	case NODE_FREQUENCY_NORMAL:
612 	  if (dump_file && (dump_flags & TDF_DETAILS))
613 	    fprintf (dump_file, "  Called by %s that is normal or hot\n",
614 		     edge->caller->dump_name ());
615 	  d->maybe_unlikely_executed = false;
616 	  d->maybe_executed_once = false;
617 	  break;
618 	}
619     }
620   return edge != NULL;
621 }
622 
623 /* Return ture if NODE contains hot calls.  */
624 
625 bool
contains_hot_call_p(struct cgraph_node * node)626 contains_hot_call_p (struct cgraph_node *node)
627 {
628   struct cgraph_edge *e;
629   for (e = node->callees; e; e = e->next_callee)
630     if (e->maybe_hot_p ())
631       return true;
632     else if (!e->inline_failed
633 	     && contains_hot_call_p (e->callee))
634       return true;
635   for (e = node->indirect_calls; e; e = e->next_callee)
636     if (e->maybe_hot_p ())
637       return true;
638   return false;
639 }
640 
641 /* See if the frequency of NODE can be updated based on frequencies of its
642    callers.  */
643 bool
ipa_propagate_frequency(struct cgraph_node * node)644 ipa_propagate_frequency (struct cgraph_node *node)
645 {
646   struct ipa_propagate_frequency_data d = {node, true, true, true, true};
647   bool changed = false;
648 
649   /* We cannot propagate anything useful about externally visible functions
650      nor about virtuals.  */
651   if (!node->local
652       || node->alias
653       || (opt_for_fn (node->decl, flag_devirtualize)
654 	  && DECL_VIRTUAL_P (node->decl)))
655     return false;
656   gcc_assert (node->analyzed);
657   if (dump_file && (dump_flags & TDF_DETAILS))
658     fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
659 
660   node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
661 				     true);
662 
663   if ((d.only_called_at_startup && !d.only_called_at_exit)
664       && !node->only_called_at_startup)
665     {
666        node->only_called_at_startup = true;
667        if (dump_file)
668          fprintf (dump_file, "Node %s promoted to only called at startup.\n",
669 		  node->dump_name ());
670        changed = true;
671     }
672   if ((d.only_called_at_exit && !d.only_called_at_startup)
673       && !node->only_called_at_exit)
674     {
675        node->only_called_at_exit = true;
676        if (dump_file)
677          fprintf (dump_file, "Node %s promoted to only called at exit.\n",
678 		  node->dump_name ());
679        changed = true;
680     }
681 
682   /* With profile we can decide on hot/normal based on count.  */
683   if (node->count. ipa().initialized_p ())
684     {
685       bool hot = false;
686       if (!(node->count. ipa() == profile_count::zero ())
687 	  && node->count. ipa() >= get_hot_bb_threshold ())
688 	hot = true;
689       if (!hot)
690 	hot |= contains_hot_call_p (node);
691       if (hot)
692 	{
693 	  if (node->frequency != NODE_FREQUENCY_HOT)
694 	    {
695 	      if (dump_file)
696 		fprintf (dump_file, "Node %s promoted to hot.\n",
697 			 node->dump_name ());
698 	      node->frequency = NODE_FREQUENCY_HOT;
699 	      return true;
700 	    }
701 	  return false;
702 	}
703       else if (node->frequency == NODE_FREQUENCY_HOT)
704 	{
705 	  if (dump_file)
706 	    fprintf (dump_file, "Node %s reduced to normal.\n",
707 		     node->dump_name ());
708 	  node->frequency = NODE_FREQUENCY_NORMAL;
709 	  changed = true;
710 	}
711     }
712   /* These come either from profile or user hints; never update them.  */
713   if (node->frequency == NODE_FREQUENCY_HOT
714       || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
715     return changed;
716   if (d.maybe_unlikely_executed)
717     {
718       node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
719       if (dump_file)
720 	fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
721 		 node->dump_name ());
722       changed = true;
723     }
724   else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
725     {
726       node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
727       if (dump_file)
728 	fprintf (dump_file, "Node %s promoted to executed once.\n",
729 		 node->dump_name ());
730       changed = true;
731     }
732   return changed;
733 }
734 
735 /* Check that number of arguments of N agrees with E.
736    Be conservative when summaries are not present.  */
737 
738 static bool
check_argument_count(struct cgraph_node * n,struct cgraph_edge * e)739 check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
740 {
741   if (!ipa_node_params_sum || !ipa_edge_args_sum)
742     return true;
743   class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
744   if (!info)
745     return true;
746   ipa_edge_args *e_info = IPA_EDGE_REF (e);
747   if (!e_info)
748     return true;
749   if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
750       && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
751 	  || !stdarg_p (TREE_TYPE (n->decl))))
752     return false;
753   return true;
754 }
755 
756 /* Simple ipa profile pass propagating frequencies across the callgraph.  */
757 
758 static unsigned int
ipa_profile(void)759 ipa_profile (void)
760 {
761   struct cgraph_node **order;
762   struct cgraph_edge *e;
763   int order_pos;
764   bool something_changed = false;
765   int i;
766   gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
767   struct cgraph_node *n,*n2;
768   int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
769   int nmismatch = 0, nimpossible = 0;
770   bool node_map_initialized = false;
771   gcov_type threshold;
772 
773   if (dump_file)
774     dump_histogram (dump_file, histogram);
775   for (i = 0; i < (int)histogram.length (); i++)
776     {
777       overall_time += histogram[i]->count * histogram[i]->time;
778       overall_size += histogram[i]->size;
779     }
780   threshold = 0;
781   if (overall_time)
782     {
783       gcc_assert (overall_size);
784 
785       cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
786       for (i = 0; cumulated < cutoff; i++)
787 	{
788 	  cumulated += histogram[i]->count * histogram[i]->time;
789           threshold = histogram[i]->count;
790 	}
791       if (!threshold)
792 	threshold = 1;
793       if (dump_file)
794 	{
795 	  gcov_type cumulated_time = 0, cumulated_size = 0;
796 
797           for (i = 0;
798 	       i < (int)histogram.length () && histogram[i]->count >= threshold;
799 	       i++)
800 	    {
801 	      cumulated_time += histogram[i]->count * histogram[i]->time;
802 	      cumulated_size += histogram[i]->size;
803 	    }
804 	  fprintf (dump_file, "Determined min count: %" PRId64
805 		   " Time:%3.2f%% Size:%3.2f%%\n",
806 		   (int64_t)threshold,
807 		   cumulated_time * 100.0 / overall_time,
808 		   cumulated_size * 100.0 / overall_size);
809 	}
810 
811       if (in_lto_p)
812 	{
813 	  if (dump_file)
814 	    fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
815           set_hot_bb_threshold (threshold);
816 	}
817     }
818   histogram.release ();
819   histogram_pool.release ();
820 
821   /* Produce speculative calls: we saved common target from profiling into
822      e->target_id.  Now, at link time, we can look up corresponding
823      function node and produce speculative call.  */
824 
825   gcc_checking_assert (call_sums);
826 
827   if (dump_file)
828     {
829       if (!node_map_initialized)
830 	init_node_map (false);
831       node_map_initialized = true;
832 
833       ipa_profile_dump_all_summaries (dump_file);
834     }
835 
836   FOR_EACH_DEFINED_FUNCTION (n)
837     {
838       bool update = false;
839 
840       if (!opt_for_fn (n->decl, flag_ipa_profile))
841 	continue;
842 
843       for (e = n->indirect_calls; e; e = e->next_callee)
844 	{
845 	  if (n->count.initialized_p ())
846 	    nindirect++;
847 
848 	  speculative_call_summary *csum = call_sums->get_create (e);
849 	  unsigned spec_count = csum->speculative_call_targets.length ();
850 	  if (spec_count)
851 	    {
852 	      if (!node_map_initialized)
853 		init_node_map (false);
854 	      node_map_initialized = true;
855 	      ncommon++;
856 
857 	      if (in_lto_p)
858 		{
859 		  if (dump_file)
860 		    {
861 		      fprintf (dump_file,
862 			       "Updating hotness threshold in LTO mode.\n");
863 		      fprintf (dump_file, "Updated min count: %" PRId64 "\n",
864 			       (int64_t) threshold / spec_count);
865 		    }
866 		  set_hot_bb_threshold (threshold / spec_count);
867 		}
868 
869 	      unsigned speculative_id = 0;
870 	      profile_count orig = e->count;
871 	      for (unsigned i = 0; i < spec_count; i++)
872 		{
873 		  speculative_call_target item
874 		    = csum->speculative_call_targets[i];
875 		  n2 = find_func_by_profile_id (item.target_id);
876 		  if (n2)
877 		    {
878 		      if (dump_file)
879 			{
880 			  fprintf (dump_file,
881 				   "Indirect call -> direct call from"
882 				   " other module %s => %s, prob %3.2f\n",
883 				   n->dump_name (),
884 				   n2->dump_name (),
885 				   item.target_probability
886 				     / (float) REG_BR_PROB_BASE);
887 			}
888 		      if (item.target_probability < REG_BR_PROB_BASE / 2)
889 			{
890 			  nuseless++;
891 			  if (dump_file)
892 			    fprintf (dump_file,
893 				     "Not speculating: "
894 				     "probability is too low.\n");
895 			}
896 		      else if (!e->maybe_hot_p ())
897 			{
898 			  nuseless++;
899 			  if (dump_file)
900 			    fprintf (dump_file,
901 				     "Not speculating: call is cold.\n");
902 			}
903 		      else if (n2->get_availability () <= AVAIL_INTERPOSABLE
904 			       && n2->can_be_discarded_p ())
905 			{
906 			  nuseless++;
907 			  if (dump_file)
908 			    fprintf (dump_file,
909 				     "Not speculating: target is overwritable "
910 				     "and can be discarded.\n");
911 			}
912 		      else if (!check_argument_count (n2, e))
913 			{
914 			  nmismatch++;
915 			  if (dump_file)
916 			    fprintf (dump_file,
917 				     "Not speculating: "
918 				     "parameter count mismatch\n");
919 			}
920 		      else if (e->indirect_info->polymorphic
921 			       && !opt_for_fn (n->decl, flag_devirtualize)
922 			       && !possible_polymorphic_call_target_p (e, n2))
923 			{
924 			  nimpossible++;
925 			  if (dump_file)
926 			    fprintf (dump_file,
927 				     "Not speculating: "
928 				     "function is not in the polymorphic "
929 				     "call target list\n");
930 			}
931 		      else
932 			{
933 			  /* Target may be overwritable, but profile says that
934 			     control flow goes to this particular implementation
935 			     of N2.  Speculate on the local alias to allow
936 			     inlining.  */
937 			  if (!n2->can_be_discarded_p ())
938 			    {
939 			      cgraph_node *alias;
940 			      alias = dyn_cast<cgraph_node *>
941 				   (n2->noninterposable_alias ());
942 			      if (alias)
943 				n2 = alias;
944 			    }
945 			  nconverted++;
946 			  profile_probability prob
947 				 = profile_probability::from_reg_br_prob_base
948 					(item.target_probability).adjusted ();
949 			  e->make_speculative (n2,
950 					       orig.apply_probability (prob),
951 					       speculative_id);
952 			  update = true;
953 			  speculative_id++;
954 			}
955 		    }
956 		  else
957 		    {
958 		      if (dump_file)
959 			fprintf (dump_file,
960 				 "Function with profile-id %i not found.\n",
961 				 item.target_id);
962 		      nunknown++;
963 		    }
964 		}
965 	    }
966 	}
967       if (update)
968 	ipa_update_overall_fn_summary (n);
969     }
970   if (node_map_initialized)
971     del_node_map ();
972   if (dump_file && nindirect)
973     fprintf (dump_file,
974 	     "%i indirect calls trained.\n"
975 	     "%i (%3.2f%%) have common target.\n"
976 	     "%i (%3.2f%%) targets was not found.\n"
977 	     "%i (%3.2f%%) targets had parameter count mismatch.\n"
978 	     "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
979 	     "%i (%3.2f%%) speculations seems useless.\n"
980 	     "%i (%3.2f%%) speculations produced.\n",
981 	     nindirect,
982 	     ncommon, ncommon * 100.0 / nindirect,
983 	     nunknown, nunknown * 100.0 / nindirect,
984 	     nmismatch, nmismatch * 100.0 / nindirect,
985 	     nimpossible, nimpossible * 100.0 / nindirect,
986 	     nuseless, nuseless * 100.0 / nindirect,
987 	     nconverted, nconverted * 100.0 / nindirect);
988 
989   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
990   order_pos = ipa_reverse_postorder (order);
991   for (i = order_pos - 1; i >= 0; i--)
992     {
993       if (order[i]->local
994 	  && opt_for_fn (order[i]->decl, flag_ipa_profile)
995 	  && ipa_propagate_frequency (order[i]))
996 	{
997 	  for (e = order[i]->callees; e; e = e->next_callee)
998 	    if (e->callee->local && !e->callee->aux)
999 	      {
1000 	        something_changed = true;
1001 	        e->callee->aux = (void *)1;
1002 	      }
1003 	}
1004       order[i]->aux = NULL;
1005     }
1006 
1007   while (something_changed)
1008     {
1009       something_changed = false;
1010       for (i = order_pos - 1; i >= 0; i--)
1011 	{
1012 	  if (order[i]->aux
1013 	      && opt_for_fn (order[i]->decl, flag_ipa_profile)
1014 	      && ipa_propagate_frequency (order[i]))
1015 	    {
1016 	      for (e = order[i]->callees; e; e = e->next_callee)
1017 		if (e->callee->local && !e->callee->aux)
1018 		  {
1019 		    something_changed = true;
1020 		    e->callee->aux = (void *)1;
1021 		  }
1022 	    }
1023 	  order[i]->aux = NULL;
1024 	}
1025     }
1026   free (order);
1027 
1028   if (dump_file && (dump_flags & TDF_DETAILS))
1029     symtab->dump (dump_file);
1030 
1031   delete call_sums;
1032   call_sums = NULL;
1033 
1034   return 0;
1035 }
1036 
1037 namespace {
1038 
1039 const pass_data pass_data_ipa_profile =
1040 {
1041   IPA_PASS, /* type */
1042   "profile_estimate", /* name */
1043   OPTGROUP_NONE, /* optinfo_flags */
1044   TV_IPA_PROFILE, /* tv_id */
1045   0, /* properties_required */
1046   0, /* properties_provided */
1047   0, /* properties_destroyed */
1048   0, /* todo_flags_start */
1049   0, /* todo_flags_finish */
1050 };
1051 
1052 class pass_ipa_profile : public ipa_opt_pass_d
1053 {
1054 public:
pass_ipa_profile(gcc::context * ctxt)1055   pass_ipa_profile (gcc::context *ctxt)
1056     : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1057 		      ipa_profile_generate_summary, /* generate_summary */
1058 		      ipa_profile_write_summary, /* write_summary */
1059 		      ipa_profile_read_summary, /* read_summary */
1060 		      NULL, /* write_optimization_summary */
1061 		      NULL, /* read_optimization_summary */
1062 		      NULL, /* stmt_fixup */
1063 		      0, /* function_transform_todo_flags_start */
1064 		      NULL, /* function_transform */
1065 		      NULL) /* variable_transform */
1066   {}
1067 
1068   /* opt_pass methods: */
gate(function *)1069   virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
execute(function *)1070   virtual unsigned int execute (function *) { return ipa_profile (); }
1071 
1072 }; // class pass_ipa_profile
1073 
1074 } // anon namespace
1075 
1076 ipa_opt_pass_d *
make_pass_ipa_profile(gcc::context * ctxt)1077 make_pass_ipa_profile (gcc::context *ctxt)
1078 {
1079   return new pass_ipa_profile (ctxt);
1080 }
1081