1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2020 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "symbol-summary.h"
45 #include "ipa-prop.h"
46 #include "ipa-fnsummary.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50 #include "tree-pretty-print.h"
51 #include "gimple-pretty-print.h"
52 
53 /* The following routines implements AutoFDO optimization.
54 
55    This optimization uses sampling profiles to annotate basic block counts
56    and uses heuristics to estimate branch probabilities.
57 
58    There are three phases in AutoFDO:
59 
60    Phase 1: Read profile from the profile data file.
61      The following info is read from the profile datafile:
62         * string_table: a map between function name and its index.
63         * autofdo_source_profile: a map from function_instance name to
64           function_instance. This is represented as a forest of
65           function_instances.
66         * WorkingSet: a histogram of how many instructions are covered for a
67           given percentage of total cycles. This is describing the binary
68           level information (not source level). This info is used to help
69           decide if we want aggressive optimizations that could increase
70           code footprint (e.g. loop unroll etc.)
71      A function instance is an instance of function that could either be a
72      standalone symbol, or a clone of a function that is inlined into another
73      function.
74 
75    Phase 2: Early inline + value profile transformation.
76      Early inline uses autofdo_source_profile to find if a callsite is:
77         * inlined in the profiled binary.
78         * callee body is hot in the profiling run.
79      If both condition satisfies, early inline will inline the callsite
80      regardless of the code growth.
81      Phase 2 is an iterative process. During each iteration, we also check
82      if an indirect callsite is promoted and inlined in the profiling run.
83      If yes, vpt will happen to force promote it and in the next iteration,
84      einline will inline the promoted callsite in the next iteration.
85 
86    Phase 3: Annotate control flow graph.
87      AutoFDO uses a separate pass to:
88         * Annotate basic block count
89         * Estimate branch probability
90 
91    After the above 3 phases, all profile is readily annotated on the GCC IR.
92    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
93    use of the profile. E.g. it uses existing mechanism to calculate the basic
94    block/edge frequency, as well as the cgraph node/edge count.
95 */
96 
97 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
98 #define AUTO_PROFILE_VERSION 1
99 
100 namespace autofdo
101 {
102 
103 /* Intermediate edge info used when propagating AutoFDO profile information.
104    We can't edge->count() directly since it's computed from edge's probability
105    while probability is yet not decided during propagation.  */
106 #define AFDO_EINFO(e)                     ((class edge_info *) e->aux)
107 class edge_info
108 {
109 public:
edge_info()110   edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
is_annotated()111   bool is_annotated () const { return annotated_; }
set_annotated()112   void set_annotated () { annotated_ = true; }
get_count()113   profile_count get_count () const { return count_; }
set_count(profile_count count)114   void set_count (profile_count count) { count_ = count; }
115 private:
116   profile_count count_;
117   bool annotated_;
118 };
119 
120 /* Represent a source location: (function_decl, lineno).  */
121 typedef std::pair<tree, unsigned> decl_lineno;
122 
123 /* Represent an inline stack. vector[0] is the leaf node.  */
124 typedef auto_vec<decl_lineno> inline_stack;
125 
126 /* String array that stores function names.  */
127 typedef auto_vec<char *> string_vector;
128 
129 /* Map from function name's index in string_table to target's
130    execution count.  */
131 typedef std::map<unsigned, gcov_type> icall_target_map;
132 
133 /* Set of gimple stmts. Used to track if the stmt has already been promoted
134    to direct call.  */
135 typedef std::set<gimple *> stmt_set;
136 
137 /* Represent count info of an inline stack.  */
138 class count_info
139 {
140 public:
141   /* Sampled count of the inline stack.  */
142   gcov_type count;
143 
144   /* Map from indirect call target to its sample count.  */
145   icall_target_map targets;
146 
147   /* Whether this inline stack is already used in annotation.
148 
149      Each inline stack should only be used to annotate IR once.
150      This will be enforced when instruction-level discriminator
151      is supported.  */
152   bool annotated;
153 };
154 
155 /* operator< for "const char *".  */
156 struct string_compare
157 {
operatorstring_compare158   bool operator()(const char *a, const char *b) const
159   {
160     return strcmp (a, b) < 0;
161   }
162 };
163 
164 /* Store a string array, indexed by string position in the array.  */
165 class string_table
166 {
167 public:
string_table()168   string_table ()
169   {}
170 
171   ~string_table ();
172 
173   /* For a given string, returns its index.  */
174   int get_index (const char *name) const;
175 
176   /* For a given decl, returns the index of the decl name.  */
177   int get_index_by_decl (tree decl) const;
178 
179   /* For a given index, returns the string.  */
180   const char *get_name (int index) const;
181 
182   /* Read profile, return TRUE on success.  */
183   bool read ();
184 
185 private:
186   typedef std::map<const char *, unsigned, string_compare> string_index_map;
187   string_vector vector_;
188   string_index_map map_;
189 };
190 
191 /* Profile of a function instance:
192      1. total_count of the function.
193      2. head_count (entry basic block count) of the function (only valid when
194         function is a top-level function_instance, i.e. it is the original copy
195         instead of the inlined copy).
196      3. map from source location (decl_lineno) to profile (count_info).
197      4. map from callsite to callee function_instance.  */
198 class function_instance
199 {
200 public:
201   typedef auto_vec<function_instance *> function_instance_stack;
202 
203   /* Read the profile and return a function_instance with head count as
204      HEAD_COUNT. Recursively read callsites to create nested function_instances
205      too. STACK is used to track the recursive creation process.  */
206   static function_instance *
207   read_function_instance (function_instance_stack *stack,
208                           gcov_type head_count);
209 
210   /* Recursively deallocate all callsites (nested function_instances).  */
211   ~function_instance ();
212 
213   /* Accessors.  */
214   int
name()215   name () const
216   {
217     return name_;
218   }
219   gcov_type
total_count()220   total_count () const
221   {
222     return total_count_;
223   }
224   gcov_type
head_count()225   head_count () const
226   {
227     return head_count_;
228   }
229 
230   /* Traverse callsites of the current function_instance to find one at the
231      location of LINENO and callee name represented in DECL.  */
232   function_instance *get_function_instance_by_decl (unsigned lineno,
233                                                     tree decl) const;
234 
235   /* Store the profile info for LOC in INFO. Return TRUE if profile info
236      is found.  */
237   bool get_count_info (location_t loc, count_info *info) const;
238 
239   /* Read the inlined indirect call target profile for STMT and store it in
240      MAP, return the total count for all inlined indirect calls.  */
241   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
242 
243   /* Sum of counts that is used during annotation.  */
244   gcov_type total_annotated_count () const;
245 
246   /* Mark LOC as annotated.  */
247   void mark_annotated (location_t loc);
248 
249 private:
250   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
251   typedef std::pair<unsigned, unsigned> callsite;
252 
253   /* Map from callsite to callee function_instance.  */
254   typedef std::map<callsite, function_instance *> callsite_map;
255 
function_instance(unsigned name,gcov_type head_count)256   function_instance (unsigned name, gcov_type head_count)
257       : name_ (name), total_count_ (0), head_count_ (head_count)
258   {
259   }
260 
261   /* Map from source location (decl_lineno) to profile (count_info).  */
262   typedef std::map<unsigned, count_info> position_count_map;
263 
264   /* function_instance name index in the string_table.  */
265   unsigned name_;
266 
267   /* Total sample count.  */
268   gcov_type total_count_;
269 
270   /* Entry BB's sample count.  */
271   gcov_type head_count_;
272 
273   /* Map from callsite location to callee function_instance.  */
274   callsite_map callsites;
275 
276   /* Map from source location to count_info.  */
277   position_count_map pos_counts;
278 };
279 
280 /* Profile for all functions.  */
281 class autofdo_source_profile
282 {
283 public:
284   static autofdo_source_profile *
create()285   create ()
286   {
287     autofdo_source_profile *map = new autofdo_source_profile ();
288 
289     if (map->read ())
290       return map;
291     delete map;
292     return NULL;
293   }
294 
295   ~autofdo_source_profile ();
296 
297   /* For a given DECL, returns the top-level function_instance.  */
298   function_instance *get_function_instance_by_decl (tree decl) const;
299 
300   /* Find count_info for a given gimple STMT. If found, store the count_info
301      in INFO and return true; otherwise return false.  */
302   bool get_count_info (gimple *stmt, count_info *info) const;
303 
304   /* Find total count of the callee of EDGE.  */
305   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
306 
307   /* Update value profile INFO for STMT from the inlined indirect callsite.
308      Return true if INFO is updated.  */
309   bool update_inlined_ind_target (gcall *stmt, count_info *info);
310 
311   /* Mark LOC as annotated.  */
312   void mark_annotated (location_t loc);
313 
314 private:
315   /* Map from function_instance name index (in string_table) to
316      function_instance.  */
317   typedef std::map<unsigned, function_instance *> name_function_instance_map;
318 
autofdo_source_profile()319   autofdo_source_profile () {}
320 
321   /* Read AutoFDO profile and returns TRUE on success.  */
322   bool read ();
323 
324   /* Return the function_instance in the profile that correspond to the
325      inline STACK.  */
326   function_instance *
327   get_function_instance_by_inline_stack (const inline_stack &stack) const;
328 
329   name_function_instance_map map_;
330 };
331 
332 /* Store the strings read from the profile data file.  */
333 static string_table *afdo_string_table;
334 
335 /* Store the AutoFDO source profile.  */
336 static autofdo_source_profile *afdo_source_profile;
337 
338 /* gcov_summary structure to store the profile_info.  */
339 static gcov_summary *afdo_profile_info;
340 
341 /* Helper functions.  */
342 
343 /* Return the original name of NAME: strip the suffix that starts
344    with '.' Caller is responsible for freeing RET.  */
345 
346 static char *
get_original_name(const char * name)347 get_original_name (const char *name)
348 {
349   char *ret = xstrdup (name);
350   char *find = strchr (ret, '.');
351   if (find != NULL)
352     *find = 0;
353   return ret;
354 }
355 
356 /* Return the combined location, which is a 32bit integer in which
357    higher 16 bits stores the line offset of LOC to the start lineno
358    of DECL, The lower 16 bits stores the discriminator.  */
359 
360 static unsigned
get_combined_location(location_t loc,tree decl)361 get_combined_location (location_t loc, tree decl)
362 {
363   /* TODO: allow more bits for line and less bits for discriminator.  */
364   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
365     warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
366   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
367 }
368 
369 /* Return the function decl of a given lexical BLOCK.  */
370 
371 static tree
get_function_decl_from_block(tree block)372 get_function_decl_from_block (tree block)
373 {
374   if (!inlined_function_outer_scope_p (block))
375     return NULL_TREE;
376 
377   return BLOCK_ABSTRACT_ORIGIN (block);
378 }
379 
380 /* Store inline stack for STMT in STACK.  */
381 
382 static void
get_inline_stack(location_t locus,inline_stack * stack)383 get_inline_stack (location_t locus, inline_stack *stack)
384 {
385   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
386     return;
387 
388   tree block = LOCATION_BLOCK (locus);
389   if (block && TREE_CODE (block) == BLOCK)
390     {
391       int level = 0;
392       for (block = BLOCK_SUPERCONTEXT (block);
393            block && (TREE_CODE (block) == BLOCK);
394            block = BLOCK_SUPERCONTEXT (block))
395         {
396           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
397           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
398             continue;
399 
400           tree decl = get_function_decl_from_block (block);
401           stack->safe_push (
402               std::make_pair (decl, get_combined_location (locus, decl)));
403           locus = tmp_locus;
404           level++;
405         }
406     }
407   stack->safe_push (
408       std::make_pair (current_function_decl,
409                       get_combined_location (locus, current_function_decl)));
410 }
411 
412 /* Return STMT's combined location, which is a 32bit integer in which
413    higher 16 bits stores the line offset of LOC to the start lineno
414    of DECL, The lower 16 bits stores the discriminator.  */
415 
416 static unsigned
get_relative_location_for_stmt(gimple * stmt)417 get_relative_location_for_stmt (gimple *stmt)
418 {
419   location_t locus = gimple_location (stmt);
420   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
421     return UNKNOWN_LOCATION;
422 
423   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
424        block = BLOCK_SUPERCONTEXT (block))
425     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
426       return get_combined_location (locus,
427                                     get_function_decl_from_block (block));
428   return get_combined_location (locus, current_function_decl);
429 }
430 
431 /* Return true if BB contains indirect call.  */
432 
433 static bool
has_indirect_call(basic_block bb)434 has_indirect_call (basic_block bb)
435 {
436   gimple_stmt_iterator gsi;
437 
438   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
439     {
440       gimple *stmt = gsi_stmt (gsi);
441       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
442           && (gimple_call_fn (stmt) == NULL
443               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
444         return true;
445     }
446   return false;
447 }
448 
449 /* Member functions for string_table.  */
450 
451 /* Deconstructor.  */
452 
~string_table()453 string_table::~string_table ()
454 {
455   for (unsigned i = 0; i < vector_.length (); i++)
456     free (vector_[i]);
457 }
458 
459 
460 /* Return the index of a given function NAME. Return -1 if NAME is not
461    found in string table.  */
462 
463 int
get_index(const char * name)464 string_table::get_index (const char *name) const
465 {
466   if (name == NULL)
467     return -1;
468   string_index_map::const_iterator iter = map_.find (name);
469   if (iter == map_.end ())
470     return -1;
471 
472   return iter->second;
473 }
474 
475 /* Return the index of a given function DECL. Return -1 if DECL is not
476    found in string table.  */
477 
478 int
get_index_by_decl(tree decl)479 string_table::get_index_by_decl (tree decl) const
480 {
481   char *name
482       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
483   int ret = get_index (name);
484   free (name);
485   if (ret != -1)
486     return ret;
487   ret = get_index (lang_hooks.dwarf_name (decl, 0));
488   if (ret != -1)
489     return ret;
490   if (DECL_FROM_INLINE (decl))
491     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
492 
493   return -1;
494 }
495 
496 /* Return the function name of a given INDEX.  */
497 
498 const char *
get_name(int index)499 string_table::get_name (int index) const
500 {
501   gcc_assert (index > 0 && index < (int)vector_.length ());
502   return vector_[index];
503 }
504 
505 /* Read the string table. Return TRUE if reading is successful.  */
506 
507 bool
read()508 string_table::read ()
509 {
510   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
511     return false;
512   /* Skip the length of the section.  */
513   gcov_read_unsigned ();
514   /* Read in the file name table.  */
515   unsigned string_num = gcov_read_unsigned ();
516   for (unsigned i = 0; i < string_num; i++)
517     {
518       vector_.safe_push (get_original_name (gcov_read_string ()));
519       map_[vector_.last ()] = i;
520     }
521   return true;
522 }
523 
524 /* Member functions for function_instance.  */
525 
~function_instance()526 function_instance::~function_instance ()
527 {
528   for (callsite_map::iterator iter = callsites.begin ();
529        iter != callsites.end (); ++iter)
530     delete iter->second;
531 }
532 
533 /* Traverse callsites of the current function_instance to find one at the
534    location of LINENO and callee name represented in DECL.  */
535 
536 function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl)537 function_instance::get_function_instance_by_decl (unsigned lineno,
538                                                   tree decl) const
539 {
540   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
541   if (func_name_idx != -1)
542     {
543       callsite_map::const_iterator ret
544           = callsites.find (std::make_pair (lineno, func_name_idx));
545       if (ret != callsites.end ())
546         return ret->second;
547     }
548   func_name_idx
549       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
550   if (func_name_idx != -1)
551     {
552       callsite_map::const_iterator ret
553           = callsites.find (std::make_pair (lineno, func_name_idx));
554       if (ret != callsites.end ())
555         return ret->second;
556     }
557   if (DECL_FROM_INLINE (decl))
558     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
559 
560   return NULL;
561 }
562 
563 /* Store the profile info for LOC in INFO. Return TRUE if profile info
564    is found.  */
565 
566 bool
get_count_info(location_t loc,count_info * info)567 function_instance::get_count_info (location_t loc, count_info *info) const
568 {
569   position_count_map::const_iterator iter = pos_counts.find (loc);
570   if (iter == pos_counts.end ())
571     return false;
572   *info = iter->second;
573   return true;
574 }
575 
576 /* Mark LOC as annotated.  */
577 
578 void
mark_annotated(location_t loc)579 function_instance::mark_annotated (location_t loc)
580 {
581   position_count_map::iterator iter = pos_counts.find (loc);
582   if (iter == pos_counts.end ())
583     return;
584   iter->second.annotated = true;
585 }
586 
587 /* Read the inlined indirect call target profile for STMT and store it in
588    MAP, return the total count for all inlined indirect calls.  */
589 
590 gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map)591 function_instance::find_icall_target_map (gcall *stmt,
592                                           icall_target_map *map) const
593 {
594   gcov_type ret = 0;
595   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
596 
597   for (callsite_map::const_iterator iter = callsites.begin ();
598        iter != callsites.end (); ++iter)
599     {
600       unsigned callee = iter->second->name ();
601       /* Check if callsite location match the stmt.  */
602       if (iter->first.first != stmt_offset)
603         continue;
604       struct cgraph_node *node = cgraph_node::get_for_asmname (
605           get_identifier (afdo_string_table->get_name (callee)));
606       if (node == NULL)
607         continue;
608       (*map)[callee] = iter->second->total_count ();
609       ret += iter->second->total_count ();
610     }
611   return ret;
612 }
613 
614 /* Read the profile and create a function_instance with head count as
615    HEAD_COUNT. Recursively read callsites to create nested function_instances
616    too. STACK is used to track the recursive creation process.  */
617 
618 /* function instance profile format:
619 
620    ENTRY_COUNT: 8 bytes
621    NAME_INDEX: 4 bytes
622    NUM_POS_COUNTS: 4 bytes
623    NUM_CALLSITES: 4 byte
624    POS_COUNT_1:
625      POS_1_OFFSET: 4 bytes
626      NUM_TARGETS: 4 bytes
627      COUNT: 8 bytes
628      TARGET_1:
629        VALUE_PROFILE_TYPE: 4 bytes
630        TARGET_IDX: 8 bytes
631        COUNT: 8 bytes
632      TARGET_2
633      ...
634      TARGET_n
635    POS_COUNT_2
636    ...
637    POS_COUNT_N
638    CALLSITE_1:
639      CALLSITE_1_OFFSET: 4 bytes
640      FUNCTION_INSTANCE_PROFILE (nested)
641    CALLSITE_2
642    ...
643    CALLSITE_n.  */
644 
645 function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)646 function_instance::read_function_instance (function_instance_stack *stack,
647                                            gcov_type head_count)
648 {
649   unsigned name = gcov_read_unsigned ();
650   unsigned num_pos_counts = gcov_read_unsigned ();
651   unsigned num_callsites = gcov_read_unsigned ();
652   function_instance *s = new function_instance (name, head_count);
653   stack->safe_push (s);
654 
655   for (unsigned i = 0; i < num_pos_counts; i++)
656     {
657       unsigned offset = gcov_read_unsigned () & 0xffff0000;
658       unsigned num_targets = gcov_read_unsigned ();
659       gcov_type count = gcov_read_counter ();
660       s->pos_counts[offset].count = count;
661       for (unsigned j = 0; j < stack->length (); j++)
662         (*stack)[j]->total_count_ += count;
663       for (unsigned j = 0; j < num_targets; j++)
664         {
665           /* Only indirect call target histogram is supported now.  */
666           gcov_read_unsigned ();
667           gcov_type target_idx = gcov_read_counter ();
668           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
669         }
670     }
671   for (unsigned i = 0; i < num_callsites; i++)
672     {
673       unsigned offset = gcov_read_unsigned ();
674       function_instance *callee_function_instance
675           = read_function_instance (stack, 0);
676       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
677           = callee_function_instance;
678     }
679   stack->pop ();
680   return s;
681 }
682 
683 /* Sum of counts that is used during annotation.  */
684 
685 gcov_type
total_annotated_count()686 function_instance::total_annotated_count () const
687 {
688   gcov_type ret = 0;
689   for (callsite_map::const_iterator iter = callsites.begin ();
690        iter != callsites.end (); ++iter)
691     ret += iter->second->total_annotated_count ();
692   for (position_count_map::const_iterator iter = pos_counts.begin ();
693        iter != pos_counts.end (); ++iter)
694     if (iter->second.annotated)
695       ret += iter->second.count;
696   return ret;
697 }
698 
699 /* Member functions for autofdo_source_profile.  */
700 
~autofdo_source_profile()701 autofdo_source_profile::~autofdo_source_profile ()
702 {
703   for (name_function_instance_map::const_iterator iter = map_.begin ();
704        iter != map_.end (); ++iter)
705     delete iter->second;
706 }
707 
708 /* For a given DECL, returns the top-level function_instance.  */
709 
710 function_instance *
get_function_instance_by_decl(tree decl)711 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
712 {
713   int index = afdo_string_table->get_index_by_decl (decl);
714   if (index == -1)
715     return NULL;
716   name_function_instance_map::const_iterator ret = map_.find (index);
717   return ret == map_.end () ? NULL : ret->second;
718 }
719 
720 /* Find count_info for a given gimple STMT. If found, store the count_info
721    in INFO and return true; otherwise return false.  */
722 
723 bool
get_count_info(gimple * stmt,count_info * info)724 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
725 {
726   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
727     return false;
728 
729   inline_stack stack;
730   get_inline_stack (gimple_location (stmt), &stack);
731   if (stack.length () == 0)
732     return false;
733   function_instance *s = get_function_instance_by_inline_stack (stack);
734   if (s == NULL)
735     return false;
736   return s->get_count_info (stack[0].second, info);
737 }
738 
739 /* Mark LOC as annotated.  */
740 
741 void
mark_annotated(location_t loc)742 autofdo_source_profile::mark_annotated (location_t loc)
743 {
744   inline_stack stack;
745   get_inline_stack (loc, &stack);
746   if (stack.length () == 0)
747     return;
748   function_instance *s = get_function_instance_by_inline_stack (stack);
749   if (s == NULL)
750     return;
751   s->mark_annotated (stack[0].second);
752 }
753 
754 /* Update value profile INFO for STMT from the inlined indirect callsite.
755    Return true if INFO is updated.  */
756 
757 bool
update_inlined_ind_target(gcall * stmt,count_info * info)758 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
759                                                    count_info *info)
760 {
761   if (dump_file)
762     {
763       fprintf (dump_file, "Checking indirect call -> direct call ");
764       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
765     }
766 
767   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
768     {
769       if (dump_file)
770 	fprintf (dump_file, " good locus\n");
771       return false;
772     }
773 
774   count_info old_info;
775   get_count_info (stmt, &old_info);
776   gcov_type total = 0;
777   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
778        iter != old_info.targets.end (); ++iter)
779     total += iter->second;
780 
781   /* Program behavior changed, original promoted (and inlined) target is not
782      hot any more. Will avoid promote the original target.
783 
784      To check if original promoted target is still hot, we check the total
785      count of the unpromoted targets (stored in TOTAL). If a callsite count
786      (stored in INFO) is smaller than half of the total count, the original
787      promoted target is considered not hot any more.  */
788   if (info->count < total / 2)
789     {
790       if (dump_file)
791 	fprintf (dump_file, " not hot anymore %ld < %ld",
792 		 (long)info->count,
793 		 (long)total /2);
794       return false;
795     }
796 
797   inline_stack stack;
798   get_inline_stack (gimple_location (stmt), &stack);
799   if (stack.length () == 0)
800     {
801       if (dump_file)
802 	fprintf (dump_file, " no inline stack\n");
803       return false;
804     }
805   function_instance *s = get_function_instance_by_inline_stack (stack);
806   if (s == NULL)
807     {
808       if (dump_file)
809 	fprintf (dump_file, " function not found in inline stack\n");
810       return false;
811     }
812   icall_target_map map;
813   if (s->find_icall_target_map (stmt, &map) == 0)
814     {
815       if (dump_file)
816 	fprintf (dump_file, " no target map\n");
817       return false;
818     }
819   for (icall_target_map::const_iterator iter = map.begin ();
820        iter != map.end (); ++iter)
821     info->targets[iter->first] = iter->second;
822   if (dump_file)
823     fprintf (dump_file, " looks good\n");
824   return true;
825 }
826 
827 /* Find total count of the callee of EDGE.  */
828 
829 gcov_type
get_callsite_total_count(struct cgraph_edge * edge)830 autofdo_source_profile::get_callsite_total_count (
831     struct cgraph_edge *edge) const
832 {
833   inline_stack stack;
834   stack.safe_push (std::make_pair (edge->callee->decl, 0));
835   get_inline_stack (gimple_location (edge->call_stmt), &stack);
836 
837   function_instance *s = get_function_instance_by_inline_stack (stack);
838   if (s == NULL
839       || afdo_string_table->get_index (IDENTIFIER_POINTER (
840              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
841     return 0;
842 
843   return s->total_count ();
844 }
845 
846 /* Read AutoFDO profile and returns TRUE on success.  */
847 
848 /* source profile format:
849 
850    GCOV_TAG_AFDO_FUNCTION: 4 bytes
851    LENGTH: 4 bytes
852    NUM_FUNCTIONS: 4 bytes
853    FUNCTION_INSTANCE_1
854    FUNCTION_INSTANCE_2
855    ...
856    FUNCTION_INSTANCE_N.  */
857 
858 bool
read()859 autofdo_source_profile::read ()
860 {
861   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
862     {
863       inform (UNKNOWN_LOCATION, "Not expected TAG.");
864       return false;
865     }
866 
867   /* Skip the length of the section.  */
868   gcov_read_unsigned ();
869 
870   /* Read in the function/callsite profile, and store it in local
871      data structure.  */
872   unsigned function_num = gcov_read_unsigned ();
873   for (unsigned i = 0; i < function_num; i++)
874     {
875       function_instance::function_instance_stack stack;
876       function_instance *s = function_instance::read_function_instance (
877           &stack, gcov_read_counter ());
878       map_[s->name ()] = s;
879     }
880   return true;
881 }
882 
883 /* Return the function_instance in the profile that correspond to the
884    inline STACK.  */
885 
886 function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack)887 autofdo_source_profile::get_function_instance_by_inline_stack (
888     const inline_stack &stack) const
889 {
890   name_function_instance_map::const_iterator iter = map_.find (
891       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
892   if (iter == map_.end())
893     return NULL;
894   function_instance *s = iter->second;
895   for (unsigned i = stack.length() - 1; i > 0; i--)
896     {
897       s = s->get_function_instance_by_decl (
898           stack[i].second, stack[i - 1].first);
899       if (s == NULL)
900         return NULL;
901     }
902   return s;
903 }
904 
905 /* Module profile is only used by LIPO. Here we simply ignore it.  */
906 
907 static void
fake_read_autofdo_module_profile()908 fake_read_autofdo_module_profile ()
909 {
910   /* Read in the module info.  */
911   gcov_read_unsigned ();
912 
913   /* Skip the length of the section.  */
914   gcov_read_unsigned ();
915 
916   /* Read in the file name table.  */
917   unsigned total_module_num = gcov_read_unsigned ();
918   gcc_assert (total_module_num == 0);
919 }
920 
921 /* Read data from profile data file.  */
922 
923 static void
read_profile(void)924 read_profile (void)
925 {
926   if (gcov_open (auto_profile_file, 1) == 0)
927     {
928       error ("cannot open profile file %s", auto_profile_file);
929       return;
930     }
931 
932   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
933     {
934       error ("AutoFDO profile magic number does not match");
935       return;
936     }
937 
938   /* Skip the version number.  */
939   unsigned version = gcov_read_unsigned ();
940   if (version != AUTO_PROFILE_VERSION)
941     {
942       error ("AutoFDO profile version %u does match %u",
943 	     version, AUTO_PROFILE_VERSION);
944       return;
945     }
946 
947   /* Skip the empty integer.  */
948   gcov_read_unsigned ();
949 
950   /* string_table.  */
951   afdo_string_table = new string_table ();
952   if (!afdo_string_table->read())
953     {
954       error ("cannot read string table from %s", auto_profile_file);
955       return;
956     }
957 
958   /* autofdo_source_profile.  */
959   afdo_source_profile = autofdo_source_profile::create ();
960   if (afdo_source_profile == NULL)
961     {
962       error ("cannot read function profile from %s", auto_profile_file);
963       return;
964     }
965 
966   /* autofdo_module_profile.  */
967   fake_read_autofdo_module_profile ();
968 }
969 
970 /* From AutoFDO profiles, find values inside STMT for that we want to measure
971    histograms for indirect-call optimization.
972 
973    This function is actually served for 2 purposes:
974      * before annotation, we need to mark histogram, promote and inline
975      * after annotation, we just need to mark, and let follow-up logic to
976        decide if it needs to promote and inline.  */
977 
978 static void
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)979 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
980                     bool transform)
981 {
982   gimple *gs = gsi_stmt (*gsi);
983   tree callee;
984 
985   if (map.size () == 0)
986     return;
987   gcall *stmt = dyn_cast <gcall *> (gs);
988   if (!stmt
989       || gimple_call_internal_p (stmt)
990       || gimple_call_fndecl (stmt) != NULL_TREE)
991     return;
992 
993   gcov_type total = 0;
994   icall_target_map::const_iterator max_iter = map.end ();
995 
996   for (icall_target_map::const_iterator iter = map.begin ();
997        iter != map.end (); ++iter)
998     {
999       total += iter->second;
1000       if (max_iter == map.end () || max_iter->second < iter->second)
1001         max_iter = iter;
1002     }
1003   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1004       get_identifier (afdo_string_table->get_name (max_iter->first)));
1005   if (direct_call == NULL || !direct_call->profile_id)
1006     return;
1007 
1008   callee = gimple_call_fn (stmt);
1009 
1010   histogram_value hist = gimple_alloc_histogram_value (
1011       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1012   hist->n_counters = 3;
1013   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1014   gimple_add_histogram_value (cfun, stmt, hist);
1015 
1016   hist->hvalue.counters[0] = direct_call->profile_id;
1017   hist->hvalue.counters[1] = max_iter->second;
1018   hist->hvalue.counters[2] = total;
1019 
1020   if (!transform)
1021     return;
1022 
1023   struct cgraph_edge *indirect_edge
1024       = cgraph_node::get (current_function_decl)->get_edge (stmt);
1025 
1026   if (dump_file)
1027     {
1028       fprintf (dump_file, "Indirect call -> direct call ");
1029       print_generic_expr (dump_file, callee, TDF_SLIM);
1030       fprintf (dump_file, " => ");
1031       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1032     }
1033 
1034   if (direct_call == NULL)
1035     {
1036       if (dump_file)
1037         fprintf (dump_file, " not transforming\n");
1038       return;
1039     }
1040   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1041     {
1042       if (dump_file)
1043         fprintf (dump_file, " no declaration\n");
1044       return;
1045     }
1046 
1047   if (dump_file)
1048     {
1049       fprintf (dump_file, " transformation on insn ");
1050       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1051       fprintf (dump_file, "\n");
1052     }
1053 
1054   /* FIXME: Count should be initialized.  */
1055   struct cgraph_edge *new_edge
1056       = indirect_edge->make_speculative (direct_call,
1057 					 profile_count::uninitialized ());
1058   cgraph_edge::redirect_call_stmt_to_callee (new_edge);
1059   gimple_remove_histogram_value (cfun, stmt, hist);
1060   inline_call (new_edge, true, NULL, NULL, false);
1061 }
1062 
1063 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1064    histograms and adds them to list VALUES.  */
1065 
1066 static void
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)1067 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1068           bool transform)
1069 {
1070   afdo_indirect_call (gsi, map, transform);
1071 }
1072 
1073 typedef std::set<basic_block> bb_set;
1074 typedef std::set<edge> edge_set;
1075 
1076 static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1077 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1078 {
1079   return annotated.find (bb) != annotated.end ();
1080 }
1081 
1082 static void
set_bb_annotated(basic_block bb,bb_set * annotated)1083 set_bb_annotated (basic_block bb, bb_set *annotated)
1084 {
1085   annotated->insert (bb);
1086 }
1087 
1088 /* For a given BB, set its execution count. Attach value profile if a stmt
1089    is not in PROMOTED, because we only want to promote an indirect call once.
1090    Return TRUE if BB is annotated.  */
1091 
1092 static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1093 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1094 {
1095   gimple_stmt_iterator gsi;
1096   edge e;
1097   edge_iterator ei;
1098   gcov_type max_count = 0;
1099   bool has_annotated = false;
1100 
1101   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1102     {
1103       count_info info;
1104       gimple *stmt = gsi_stmt (gsi);
1105       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1106         continue;
1107       if (afdo_source_profile->get_count_info (stmt, &info))
1108         {
1109           if (info.count > max_count)
1110             max_count = info.count;
1111           has_annotated = true;
1112           if (info.targets.size () > 0
1113               && promoted.find (stmt) == promoted.end ())
1114             afdo_vpt (&gsi, info.targets, false);
1115         }
1116     }
1117 
1118   if (!has_annotated)
1119     return false;
1120 
1121   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1122     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1123   for (gphi_iterator gpi = gsi_start_phis (bb);
1124        !gsi_end_p (gpi);
1125        gsi_next (&gpi))
1126     {
1127       gphi *phi = gpi.phi ();
1128       size_t i;
1129       for (i = 0; i < gimple_phi_num_args (phi); i++)
1130         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1131     }
1132   FOR_EACH_EDGE (e, ei, bb->succs)
1133   afdo_source_profile->mark_annotated (e->goto_locus);
1134 
1135   bb->count = profile_count::from_gcov_type (max_count).afdo ();
1136   return true;
1137 }
1138 
1139 /* BB1 and BB2 are in an equivalent class iff:
1140    1. BB1 dominates BB2.
1141    2. BB2 post-dominates BB1.
1142    3. BB1 and BB2 are in the same loop nest.
1143    This function finds the equivalent class for each basic block, and
1144    stores a pointer to the first BB in its equivalent class. Meanwhile,
1145    set bb counts for the same equivalent class to be idenical. Update
1146    ANNOTATED_BB for the first BB in its equivalent class.  */
1147 
1148 static void
afdo_find_equiv_class(bb_set * annotated_bb)1149 afdo_find_equiv_class (bb_set *annotated_bb)
1150 {
1151   basic_block bb;
1152 
1153   FOR_ALL_BB_FN (bb, cfun)
1154   bb->aux = NULL;
1155 
1156   FOR_ALL_BB_FN (bb, cfun)
1157   {
1158     vec<basic_block> dom_bbs;
1159     basic_block bb1;
1160     int i;
1161 
1162     if (bb->aux != NULL)
1163       continue;
1164     bb->aux = bb;
1165     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1166     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1167       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1168 	  && bb1->loop_father == bb->loop_father)
1169 	{
1170 	  bb1->aux = bb;
1171 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1172 	    {
1173 	      bb->count = bb1->count;
1174 	      set_bb_annotated (bb, annotated_bb);
1175 	    }
1176 	}
1177     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1178     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1179       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1180 	  && bb1->loop_father == bb->loop_father)
1181 	{
1182 	  bb1->aux = bb;
1183 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1184 	    {
1185 	      bb->count = bb1->count;
1186 	      set_bb_annotated (bb, annotated_bb);
1187 	    }
1188 	}
1189   }
1190 }
1191 
1192 /* If a basic block's count is known, and only one of its in/out edges' count
1193    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1194    edges' counts are known, then the basic block's unknown count can also be
1195    calculated.
1196    IS_SUCC is true if out edges of a basic blocks are examined.
1197    Update ANNOTATED_BB accordingly.
1198    Return TRUE if any basic block/edge count is changed.  */
1199 
1200 static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb)1201 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
1202 {
1203   basic_block bb;
1204   bool changed = false;
1205 
1206   FOR_EACH_BB_FN (bb, cfun)
1207   {
1208     edge e, unknown_edge = NULL;
1209     edge_iterator ei;
1210     int num_unknown_edge = 0;
1211     profile_count total_known_count = profile_count::zero ().afdo ();
1212 
1213     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1214       {
1215 	gcc_assert (AFDO_EINFO (e) != NULL);
1216 	if (! AFDO_EINFO (e)->is_annotated ())
1217 	  num_unknown_edge++, unknown_edge = e;
1218 	else
1219 	  total_known_count += AFDO_EINFO (e)->get_count ();
1220       }
1221 
1222     /* Be careful not to annotate block with no successor in special cases.  */
1223     if (num_unknown_edge == 0 && total_known_count > bb->count)
1224       {
1225 	bb->count = total_known_count;
1226 	if (!is_bb_annotated (bb, *annotated_bb))
1227 	  set_bb_annotated (bb, annotated_bb);
1228 	changed = true;
1229       }
1230     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1231       {
1232 	if (bb->count > total_known_count)
1233 	  AFDO_EINFO (unknown_edge)->set_count (bb->count - total_known_count);
1234 	else
1235 	  AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ());
1236 	AFDO_EINFO (unknown_edge)->set_annotated ();
1237 	changed = true;
1238       }
1239   }
1240   return changed;
1241 }
1242 
1243 /* Special propagation for circuit expressions. Because GCC translates
1244    control flow into data flow for circuit expressions. E.g.
1245    BB1:
1246    if (a && b)
1247      BB2
1248    else
1249      BB3
1250 
1251    will be translated into:
1252 
1253    BB1:
1254      if (a)
1255        goto BB.t1
1256      else
1257        goto BB.t3
1258    BB.t1:
1259      if (b)
1260        goto BB.t2
1261      else
1262        goto BB.t3
1263    BB.t2:
1264      goto BB.t3
1265    BB.t3:
1266      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1267      if (tmp)
1268        goto BB2
1269      else
1270        goto BB3
1271 
1272    In this case, we need to propagate through PHI to determine the edge
1273    count of BB1->BB.t1, BB.t1->BB.t2.  */
1274 
1275 static void
afdo_propagate_circuit(const bb_set & annotated_bb)1276 afdo_propagate_circuit (const bb_set &annotated_bb)
1277 {
1278   basic_block bb;
1279   FOR_ALL_BB_FN (bb, cfun)
1280   {
1281     gimple *def_stmt;
1282     tree cmp_rhs, cmp_lhs;
1283     gimple *cmp_stmt = last_stmt (bb);
1284     edge e;
1285     edge_iterator ei;
1286 
1287     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1288       continue;
1289     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1290     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1291     if (!TREE_CONSTANT (cmp_rhs)
1292         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1293       continue;
1294     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1295       continue;
1296     if (!is_bb_annotated (bb, annotated_bb))
1297       continue;
1298     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1299     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1300            && gimple_assign_single_p (def_stmt)
1301            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1302       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1303     if (!def_stmt)
1304       continue;
1305     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1306     if (!phi_stmt)
1307       continue;
1308     FOR_EACH_EDGE (e, ei, bb->succs)
1309     {
1310       unsigned i, total = 0;
1311       edge only_one;
1312       bool check_value_one = (((integer_onep (cmp_rhs))
1313                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1314                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1315       if (! AFDO_EINFO (e)->is_annotated ())
1316         continue;
1317       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1318         {
1319           tree val = gimple_phi_arg_def (phi_stmt, i);
1320           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1321 
1322           if (!TREE_CONSTANT (val)
1323               || !(integer_zerop (val) || integer_onep (val)))
1324             continue;
1325           if (check_value_one ^ integer_onep (val))
1326             continue;
1327           total++;
1328           only_one = ep;
1329           if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
1330 	      && ! AFDO_EINFO (ep)->is_annotated ())
1331 	    {
1332 	      AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
1333 	      AFDO_EINFO (ep)->set_annotated ();
1334 	    }
1335 	}
1336       if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
1337 	{
1338 	  AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
1339 	  AFDO_EINFO (only_one)->set_annotated ();
1340 	}
1341     }
1342   }
1343 }
1344 
1345 /* Propagate the basic block count and edge count on the control flow
1346    graph. We do the propagation iteratively until stablize.  */
1347 
1348 static void
afdo_propagate(bb_set * annotated_bb)1349 afdo_propagate (bb_set *annotated_bb)
1350 {
1351   basic_block bb;
1352   bool changed = true;
1353   int i = 0;
1354 
1355   FOR_ALL_BB_FN (bb, cfun)
1356   {
1357     bb->count = ((basic_block)bb->aux)->count;
1358     if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
1359       set_bb_annotated (bb, annotated_bb);
1360   }
1361 
1362   while (changed && i++ < 10)
1363     {
1364       changed = false;
1365 
1366       if (afdo_propagate_edge (true, annotated_bb))
1367         changed = true;
1368       if (afdo_propagate_edge (false, annotated_bb))
1369         changed = true;
1370       afdo_propagate_circuit (*annotated_bb);
1371     }
1372 }
1373 
1374 /* Propagate counts on control flow graph and calculate branch
1375    probabilities.  */
1376 
1377 static void
afdo_calculate_branch_prob(bb_set * annotated_bb)1378 afdo_calculate_branch_prob (bb_set *annotated_bb)
1379 {
1380   edge e;
1381   edge_iterator ei;
1382   basic_block bb;
1383 
1384   calculate_dominance_info (CDI_POST_DOMINATORS);
1385   calculate_dominance_info (CDI_DOMINATORS);
1386   loop_optimizer_init (0);
1387 
1388   FOR_ALL_BB_FN (bb, cfun)
1389     {
1390       gcc_assert (bb->aux == NULL);
1391       FOR_EACH_EDGE (e, ei, bb->succs)
1392 	{
1393 	  gcc_assert (e->aux == NULL);
1394 	  e->aux = new edge_info ();
1395 	}
1396     }
1397 
1398   afdo_find_equiv_class (annotated_bb);
1399   afdo_propagate (annotated_bb);
1400 
1401   FOR_EACH_BB_FN (bb, cfun)
1402   {
1403     int num_unknown_succ = 0;
1404     profile_count total_count = profile_count::zero ().afdo ();
1405 
1406     FOR_EACH_EDGE (e, ei, bb->succs)
1407     {
1408       gcc_assert (AFDO_EINFO (e) != NULL);
1409       if (! AFDO_EINFO (e)->is_annotated ())
1410         num_unknown_succ++;
1411       else
1412         total_count += AFDO_EINFO (e)->get_count ();
1413     }
1414     if (num_unknown_succ == 0 && total_count > profile_count::zero ())
1415       {
1416 	FOR_EACH_EDGE (e, ei, bb->succs)
1417 	  e->probability
1418 	    = AFDO_EINFO (e)->get_count ().probability_in (total_count);
1419       }
1420   }
1421   FOR_ALL_BB_FN (bb, cfun)
1422     {
1423       bb->aux = NULL;
1424       FOR_EACH_EDGE (e, ei, bb->succs)
1425 	if (AFDO_EINFO (e) != NULL)
1426 	  {
1427 	    delete AFDO_EINFO (e);
1428 	    e->aux = NULL;
1429 	  }
1430     }
1431 
1432   loop_optimizer_finalize ();
1433   free_dominance_info (CDI_DOMINATORS);
1434   free_dominance_info (CDI_POST_DOMINATORS);
1435 }
1436 
1437 /* Perform value profile transformation using AutoFDO profile. Add the
1438    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1439    indirect call promoted.  */
1440 
1441 static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1442 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1443 {
1444   basic_block bb;
1445   if (afdo_source_profile->get_function_instance_by_decl (
1446           current_function_decl) == NULL)
1447     return false;
1448 
1449   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1450 
1451   bool has_vpt = false;
1452   FOR_EACH_BB_FN (bb, cfun)
1453   {
1454     if (!has_indirect_call (bb))
1455       continue;
1456     gimple_stmt_iterator gsi;
1457 
1458     gcov_type bb_count = 0;
1459     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1460       {
1461         count_info info;
1462 	gimple *stmt = gsi_stmt (gsi);
1463         if (afdo_source_profile->get_count_info (stmt, &info))
1464           bb_count = MAX (bb_count, info.count);
1465       }
1466 
1467     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1468       {
1469         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1470         /* IC_promotion and early_inline_2 is done in multiple iterations.
1471            No need to promoted the stmt if its in promoted_stmts (means
1472            it is already been promoted in the previous iterations).  */
1473         if ((!stmt) || gimple_call_fn (stmt) == NULL
1474             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1475             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1476           continue;
1477 
1478         count_info info;
1479         afdo_source_profile->get_count_info (stmt, &info);
1480         info.count = bb_count;
1481         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1482           {
1483             /* Promote the indirect call and update the promoted_stmts.  */
1484             promoted_stmts->insert (stmt);
1485             afdo_vpt (&gsi, info.targets, true);
1486             has_vpt = true;
1487           }
1488       }
1489   }
1490 
1491   if (has_vpt)
1492     {
1493       unsigned todo = optimize_inline_calls (current_function_decl);
1494       if (todo & TODO_update_ssa_any)
1495        update_ssa (TODO_update_ssa);
1496       return true;
1497     }
1498 
1499   return false;
1500 }
1501 
1502 /* Annotate auto profile to the control flow graph. Do not annotate value
1503    profile for stmts in PROMOTED_STMTS.  */
1504 
1505 static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1506 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1507 {
1508   basic_block bb;
1509   bb_set annotated_bb;
1510   const function_instance *s
1511       = afdo_source_profile->get_function_instance_by_decl (
1512           current_function_decl);
1513 
1514   if (s == NULL)
1515     return;
1516   cgraph_node::get (current_function_decl)->count
1517      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1518   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1519      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1520   EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo ();
1521   profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1522 
1523   FOR_EACH_BB_FN (bb, cfun)
1524     {
1525       /* As autoFDO uses sampling approach, we have to assume that all
1526 	 counters are zero when not seen by autoFDO.  */
1527       bb->count = profile_count::zero ().afdo ();
1528       if (afdo_set_bb_count (bb, promoted_stmts))
1529 	set_bb_annotated (bb, &annotated_bb);
1530       if (bb->count > max_count)
1531 	max_count = bb->count;
1532     }
1533   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1534       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1535     {
1536       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1537           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1538       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1539     }
1540   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1541       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1542     {
1543       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1544           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1545       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1546     }
1547   afdo_source_profile->mark_annotated (
1548       DECL_SOURCE_LOCATION (current_function_decl));
1549   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1550   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1551   if (max_count > profile_count::zero ())
1552     {
1553       /* Calculate, propagate count and probability information on CFG.  */
1554       afdo_calculate_branch_prob (&annotated_bb);
1555     }
1556   update_max_bb_count ();
1557   profile_status_for_fn (cfun) = PROFILE_READ;
1558   if (flag_value_profile_transformations)
1559     {
1560       gimple_value_profile_transformations ();
1561       free_dominance_info (CDI_DOMINATORS);
1562       free_dominance_info (CDI_POST_DOMINATORS);
1563       update_ssa (TODO_update_ssa);
1564     }
1565 }
1566 
1567 /* Wrapper function to invoke early inliner.  */
1568 
1569 static void
early_inline()1570 early_inline ()
1571 {
1572   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1573   unsigned todo = early_inliner (cfun);
1574   if (todo & TODO_update_ssa_any)
1575     update_ssa (TODO_update_ssa);
1576 }
1577 
1578 /* Use AutoFDO profile to annoate the control flow graph.
1579    Return the todo flag.  */
1580 
1581 static unsigned int
auto_profile(void)1582 auto_profile (void)
1583 {
1584   struct cgraph_node *node;
1585 
1586   if (symtab->state == FINISHED)
1587     return 0;
1588 
1589   init_node_map (true);
1590   profile_info = autofdo::afdo_profile_info;
1591 
1592   FOR_EACH_FUNCTION (node)
1593   {
1594     if (!gimple_has_body_p (node->decl))
1595       continue;
1596 
1597     /* Don't profile functions produced for builtin stuff.  */
1598     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1599       continue;
1600 
1601     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1602 
1603     /* First do indirect call promotion and early inline to make the
1604        IR match the profiled binary before actual annotation.
1605 
1606        This is needed because an indirect call might have been promoted
1607        and inlined in the profiled binary. If we do not promote and
1608        inline these indirect calls before annotation, the profile for
1609        these promoted functions will be lost.
1610 
1611        e.g. foo() --indirect_call--> bar()
1612        In profiled binary, the callsite is promoted and inlined, making
1613        the profile look like:
1614 
1615        foo: {
1616          loc_foo_1: count_1
1617          bar@loc_foo_2: {
1618            loc_bar_1: count_2
1619            loc_bar_2: count_3
1620          }
1621        }
1622 
1623        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1624        If we perform annotation on it, the profile inside bar@loc_foo2
1625        will be wasted.
1626 
1627        To avoid this, we promote loc_foo_2 and inline the promoted bar
1628        function before annotation, so the profile inside bar@loc_foo2
1629        will be useful.  */
1630     autofdo::stmt_set promoted_stmts;
1631     for (int i = 0; i < opt_for_fn (node->decl,
1632 				    param_early_inliner_max_iterations); i++)
1633       {
1634         if (!flag_value_profile_transformations
1635             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1636           break;
1637         early_inline ();
1638       }
1639 
1640     early_inline ();
1641     autofdo::afdo_annotate_cfg (promoted_stmts);
1642     compute_function_frequency ();
1643 
1644     /* Local pure-const may imply need to fixup the cfg.  */
1645     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1646       cleanup_tree_cfg ();
1647 
1648     free_dominance_info (CDI_DOMINATORS);
1649     free_dominance_info (CDI_POST_DOMINATORS);
1650     cgraph_edge::rebuild_edges ();
1651     compute_fn_summary (cgraph_node::get (current_function_decl), true);
1652     pop_cfun ();
1653   }
1654 
1655   return TODO_rebuild_cgraph_edges;
1656 }
1657 } /* namespace autofdo.  */
1658 
1659 /* Read the profile from the profile data file.  */
1660 
1661 void
read_autofdo_file(void)1662 read_autofdo_file (void)
1663 {
1664   if (auto_profile_file == NULL)
1665     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1666 
1667   autofdo::afdo_profile_info = XNEW (gcov_summary);
1668   autofdo::afdo_profile_info->runs = 1;
1669   autofdo::afdo_profile_info->sum_max = 0;
1670 
1671   /* Read the profile from the profile file.  */
1672   autofdo::read_profile ();
1673 }
1674 
1675 /* Free the resources.  */
1676 
1677 void
end_auto_profile(void)1678 end_auto_profile (void)
1679 {
1680   delete autofdo::afdo_source_profile;
1681   delete autofdo::afdo_string_table;
1682   profile_info = NULL;
1683 }
1684 
1685 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1686 
1687 bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1688 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1689 {
1690   gcov_type count
1691       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1692 
1693   if (count > 0)
1694     {
1695       bool is_hot;
1696       profile_count pcount = profile_count::from_gcov_type (count).afdo ();
1697       gcov_summary *saved_profile_info = profile_info;
1698       /* At early inline stage, profile_info is not set yet. We need to
1699          temporarily set it to afdo_profile_info to calculate hotness.  */
1700       profile_info = autofdo::afdo_profile_info;
1701       is_hot = maybe_hot_count_p (NULL, pcount);
1702       profile_info = saved_profile_info;
1703       return is_hot;
1704     }
1705 
1706   return false;
1707 }
1708 
1709 namespace
1710 {
1711 
1712 const pass_data pass_data_ipa_auto_profile = {
1713   SIMPLE_IPA_PASS, "afdo", /* name */
1714   OPTGROUP_NONE,           /* optinfo_flags */
1715   TV_IPA_AUTOFDO,          /* tv_id */
1716   0,                       /* properties_required */
1717   0,                       /* properties_provided */
1718   0,                       /* properties_destroyed */
1719   0,                       /* todo_flags_start */
1720   0,                       /* todo_flags_finish */
1721 };
1722 
1723 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1724 {
1725 public:
pass_ipa_auto_profile(gcc::context * ctxt)1726   pass_ipa_auto_profile (gcc::context *ctxt)
1727       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1728   {
1729   }
1730 
1731   /* opt_pass methods: */
1732   virtual bool
gate(function *)1733   gate (function *)
1734   {
1735     return flag_auto_profile;
1736   }
1737   virtual unsigned int
execute(function *)1738   execute (function *)
1739   {
1740     return autofdo::auto_profile ();
1741   }
1742 }; // class pass_ipa_auto_profile
1743 
1744 } // anon namespace
1745 
1746 simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1747 make_pass_ipa_auto_profile (gcc::context *ctxt)
1748 {
1749   return new pass_ipa_auto_profile (ctxt);
1750 }
1751