1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2016 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "params.h"
45 #include "symbol-summary.h"
46 #include "ipa-prop.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50 
51 /* The following routines implements AutoFDO optimization.
52 
53    This optimization uses sampling profiles to annotate basic block counts
54    and uses heuristics to estimate branch probabilities.
55 
56    There are three phases in AutoFDO:
57 
58    Phase 1: Read profile from the profile data file.
59      The following info is read from the profile datafile:
60         * string_table: a map between function name and its index.
61         * autofdo_source_profile: a map from function_instance name to
62           function_instance. This is represented as a forest of
63           function_instances.
64         * WorkingSet: a histogram of how many instructions are covered for a
65           given percentage of total cycles. This is describing the binary
66           level information (not source level). This info is used to help
67           decide if we want aggressive optimizations that could increase
68           code footprint (e.g. loop unroll etc.)
69      A function instance is an instance of function that could either be a
70      standalone symbol, or a clone of a function that is inlined into another
71      function.
72 
73    Phase 2: Early inline + value profile transformation.
74      Early inline uses autofdo_source_profile to find if a callsite is:
75         * inlined in the profiled binary.
76         * callee body is hot in the profiling run.
77      If both condition satisfies, early inline will inline the callsite
78      regardless of the code growth.
79      Phase 2 is an iterative process. During each iteration, we also check
80      if an indirect callsite is promoted and inlined in the profiling run.
81      If yes, vpt will happen to force promote it and in the next iteration,
82      einline will inline the promoted callsite in the next iteration.
83 
84    Phase 3: Annotate control flow graph.
85      AutoFDO uses a separate pass to:
86         * Annotate basic block count
87         * Estimate branch probability
88 
89    After the above 3 phases, all profile is readily annotated on the GCC IR.
90    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
91    use of the profile. E.g. it uses existing mechanism to calculate the basic
92    block/edge frequency, as well as the cgraph node/edge count.
93 */
94 
95 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
96 #define AUTO_PROFILE_VERSION 1
97 
98 namespace autofdo
99 {
100 
101 /* Represent a source location: (function_decl, lineno).  */
102 typedef std::pair<tree, unsigned> decl_lineno;
103 
104 /* Represent an inline stack. vector[0] is the leaf node.  */
105 typedef auto_vec<decl_lineno> inline_stack;
106 
107 /* String array that stores function names.  */
108 typedef auto_vec<char *> string_vector;
109 
110 /* Map from function name's index in string_table to target's
111    execution count.  */
112 typedef std::map<unsigned, gcov_type> icall_target_map;
113 
114 /* Set of gimple stmts. Used to track if the stmt has already been promoted
115    to direct call.  */
116 typedef std::set<gimple *> stmt_set;
117 
118 /* Represent count info of an inline stack.  */
119 struct count_info
120 {
121   /* Sampled count of the inline stack.  */
122   gcov_type count;
123 
124   /* Map from indirect call target to its sample count.  */
125   icall_target_map targets;
126 
127   /* Whether this inline stack is already used in annotation.
128 
129      Each inline stack should only be used to annotate IR once.
130      This will be enforced when instruction-level discriminator
131      is supported.  */
132   bool annotated;
133 };
134 
135 /* operator< for "const char *".  */
136 struct string_compare
137 {
operatorstring_compare138   bool operator()(const char *a, const char *b) const
139   {
140     return strcmp (a, b) < 0;
141   }
142 };
143 
144 /* Store a string array, indexed by string position in the array.  */
145 class string_table
146 {
147 public:
string_table()148   string_table ()
149   {}
150 
151   ~string_table ();
152 
153   /* For a given string, returns its index.  */
154   int get_index (const char *name) const;
155 
156   /* For a given decl, returns the index of the decl name.  */
157   int get_index_by_decl (tree decl) const;
158 
159   /* For a given index, returns the string.  */
160   const char *get_name (int index) const;
161 
162   /* Read profile, return TRUE on success.  */
163   bool read ();
164 
165 private:
166   typedef std::map<const char *, unsigned, string_compare> string_index_map;
167   string_vector vector_;
168   string_index_map map_;
169 };
170 
171 /* Profile of a function instance:
172      1. total_count of the function.
173      2. head_count (entry basic block count) of the function (only valid when
174         function is a top-level function_instance, i.e. it is the original copy
175         instead of the inlined copy).
176      3. map from source location (decl_lineno) to profile (count_info).
177      4. map from callsite to callee function_instance.  */
178 class function_instance
179 {
180 public:
181   typedef auto_vec<function_instance *> function_instance_stack;
182 
183   /* Read the profile and return a function_instance with head count as
184      HEAD_COUNT. Recursively read callsites to create nested function_instances
185      too. STACK is used to track the recursive creation process.  */
186   static function_instance *
187   read_function_instance (function_instance_stack *stack,
188                           gcov_type head_count);
189 
190   /* Recursively deallocate all callsites (nested function_instances).  */
191   ~function_instance ();
192 
193   /* Accessors.  */
194   int
name()195   name () const
196   {
197     return name_;
198   }
199   gcov_type
total_count()200   total_count () const
201   {
202     return total_count_;
203   }
204   gcov_type
head_count()205   head_count () const
206   {
207     return head_count_;
208   }
209 
210   /* Traverse callsites of the current function_instance to find one at the
211      location of LINENO and callee name represented in DECL.  */
212   function_instance *get_function_instance_by_decl (unsigned lineno,
213                                                     tree decl) const;
214 
215   /* Store the profile info for LOC in INFO. Return TRUE if profile info
216      is found.  */
217   bool get_count_info (location_t loc, count_info *info) const;
218 
219   /* Read the inlined indirect call target profile for STMT and store it in
220      MAP, return the total count for all inlined indirect calls.  */
221   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
222 
223   /* Sum of counts that is used during annotation.  */
224   gcov_type total_annotated_count () const;
225 
226   /* Mark LOC as annotated.  */
227   void mark_annotated (location_t loc);
228 
229 private:
230   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
231   typedef std::pair<unsigned, unsigned> callsite;
232 
233   /* Map from callsite to callee function_instance.  */
234   typedef std::map<callsite, function_instance *> callsite_map;
235 
function_instance(unsigned name,gcov_type head_count)236   function_instance (unsigned name, gcov_type head_count)
237       : name_ (name), total_count_ (0), head_count_ (head_count)
238   {
239   }
240 
241   /* Map from source location (decl_lineno) to profile (count_info).  */
242   typedef std::map<unsigned, count_info> position_count_map;
243 
244   /* function_instance name index in the string_table.  */
245   unsigned name_;
246 
247   /* Total sample count.  */
248   gcov_type total_count_;
249 
250   /* Entry BB's sample count.  */
251   gcov_type head_count_;
252 
253   /* Map from callsite location to callee function_instance.  */
254   callsite_map callsites;
255 
256   /* Map from source location to count_info.  */
257   position_count_map pos_counts;
258 };
259 
260 /* Profile for all functions.  */
261 class autofdo_source_profile
262 {
263 public:
264   static autofdo_source_profile *
create()265   create ()
266   {
267     autofdo_source_profile *map = new autofdo_source_profile ();
268 
269     if (map->read ())
270       return map;
271     delete map;
272     return NULL;
273   }
274 
275   ~autofdo_source_profile ();
276 
277   /* For a given DECL, returns the top-level function_instance.  */
278   function_instance *get_function_instance_by_decl (tree decl) const;
279 
280   /* Find count_info for a given gimple STMT. If found, store the count_info
281      in INFO and return true; otherwise return false.  */
282   bool get_count_info (gimple *stmt, count_info *info) const;
283 
284   /* Find total count of the callee of EDGE.  */
285   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
286 
287   /* Update value profile INFO for STMT from the inlined indirect callsite.
288      Return true if INFO is updated.  */
289   bool update_inlined_ind_target (gcall *stmt, count_info *info);
290 
291   /* Mark LOC as annotated.  */
292   void mark_annotated (location_t loc);
293 
294 private:
295   /* Map from function_instance name index (in string_table) to
296      function_instance.  */
297   typedef std::map<unsigned, function_instance *> name_function_instance_map;
298 
autofdo_source_profile()299   autofdo_source_profile () {}
300 
301   /* Read AutoFDO profile and returns TRUE on success.  */
302   bool read ();
303 
304   /* Return the function_instance in the profile that correspond to the
305      inline STACK.  */
306   function_instance *
307   get_function_instance_by_inline_stack (const inline_stack &stack) const;
308 
309   name_function_instance_map map_;
310 };
311 
312 /* Store the strings read from the profile data file.  */
313 static string_table *afdo_string_table;
314 
315 /* Store the AutoFDO source profile.  */
316 static autofdo_source_profile *afdo_source_profile;
317 
318 /* gcov_ctr_summary structure to store the profile_info.  */
319 static struct gcov_ctr_summary *afdo_profile_info;
320 
321 /* Helper functions.  */
322 
323 /* Return the original name of NAME: strip the suffix that starts
324    with '.' Caller is responsible for freeing RET.  */
325 
326 static char *
get_original_name(const char * name)327 get_original_name (const char *name)
328 {
329   char *ret = xstrdup (name);
330   char *find = strchr (ret, '.');
331   if (find != NULL)
332     *find = 0;
333   return ret;
334 }
335 
336 /* Return the combined location, which is a 32bit integer in which
337    higher 16 bits stores the line offset of LOC to the start lineno
338    of DECL, The lower 16 bits stores the discriminator.  */
339 
340 static unsigned
get_combined_location(location_t loc,tree decl)341 get_combined_location (location_t loc, tree decl)
342 {
343   /* TODO: allow more bits for line and less bits for discriminator.  */
344   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
345     warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
346   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
347 }
348 
349 /* Return the function decl of a given lexical BLOCK.  */
350 
351 static tree
get_function_decl_from_block(tree block)352 get_function_decl_from_block (tree block)
353 {
354   tree decl;
355 
356   if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION)
357     return NULL_TREE;
358 
359   for (decl = BLOCK_ABSTRACT_ORIGIN (block);
360        decl && (TREE_CODE (decl) == BLOCK);
361        decl = BLOCK_ABSTRACT_ORIGIN (decl))
362     if (TREE_CODE (decl) == FUNCTION_DECL)
363       break;
364   return decl;
365 }
366 
367 /* Store inline stack for STMT in STACK.  */
368 
369 static void
get_inline_stack(location_t locus,inline_stack * stack)370 get_inline_stack (location_t locus, inline_stack *stack)
371 {
372   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
373     return;
374 
375   tree block = LOCATION_BLOCK (locus);
376   if (block && TREE_CODE (block) == BLOCK)
377     {
378       int level = 0;
379       for (block = BLOCK_SUPERCONTEXT (block);
380            block && (TREE_CODE (block) == BLOCK);
381            block = BLOCK_SUPERCONTEXT (block))
382         {
383           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
384           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
385             continue;
386 
387           tree decl = get_function_decl_from_block (block);
388           stack->safe_push (
389               std::make_pair (decl, get_combined_location (locus, decl)));
390           locus = tmp_locus;
391           level++;
392         }
393     }
394   stack->safe_push (
395       std::make_pair (current_function_decl,
396                       get_combined_location (locus, current_function_decl)));
397 }
398 
399 /* Return STMT's combined location, which is a 32bit integer in which
400    higher 16 bits stores the line offset of LOC to the start lineno
401    of DECL, The lower 16 bits stores the discriminator.  */
402 
403 static unsigned
get_relative_location_for_stmt(gimple * stmt)404 get_relative_location_for_stmt (gimple *stmt)
405 {
406   location_t locus = gimple_location (stmt);
407   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
408     return UNKNOWN_LOCATION;
409 
410   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
411        block = BLOCK_SUPERCONTEXT (block))
412     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
413       return get_combined_location (locus,
414                                     get_function_decl_from_block (block));
415   return get_combined_location (locus, current_function_decl);
416 }
417 
418 /* Return true if BB contains indirect call.  */
419 
420 static bool
has_indirect_call(basic_block bb)421 has_indirect_call (basic_block bb)
422 {
423   gimple_stmt_iterator gsi;
424 
425   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
426     {
427       gimple *stmt = gsi_stmt (gsi);
428       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
429           && (gimple_call_fn (stmt) == NULL
430               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
431         return true;
432     }
433   return false;
434 }
435 
436 /* Member functions for string_table.  */
437 
438 /* Deconstructor.  */
439 
~string_table()440 string_table::~string_table ()
441 {
442   for (unsigned i = 0; i < vector_.length (); i++)
443     free (vector_[i]);
444 }
445 
446 
447 /* Return the index of a given function NAME. Return -1 if NAME is not
448    found in string table.  */
449 
450 int
get_index(const char * name)451 string_table::get_index (const char *name) const
452 {
453   if (name == NULL)
454     return -1;
455   string_index_map::const_iterator iter = map_.find (name);
456   if (iter == map_.end ())
457     return -1;
458 
459   return iter->second;
460 }
461 
462 /* Return the index of a given function DECL. Return -1 if DECL is not
463    found in string table.  */
464 
465 int
get_index_by_decl(tree decl)466 string_table::get_index_by_decl (tree decl) const
467 {
468   char *name
469       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
470   int ret = get_index (name);
471   free (name);
472   if (ret != -1)
473     return ret;
474   ret = get_index (lang_hooks.dwarf_name (decl, 0));
475   if (ret != -1)
476     return ret;
477   if (DECL_ABSTRACT_ORIGIN (decl))
478     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
479 
480   return -1;
481 }
482 
483 /* Return the function name of a given INDEX.  */
484 
485 const char *
get_name(int index)486 string_table::get_name (int index) const
487 {
488   gcc_assert (index > 0 && index < (int)vector_.length ());
489   return vector_[index];
490 }
491 
492 /* Read the string table. Return TRUE if reading is successful.  */
493 
494 bool
read()495 string_table::read ()
496 {
497   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
498     return false;
499   /* Skip the length of the section.  */
500   gcov_read_unsigned ();
501   /* Read in the file name table.  */
502   unsigned string_num = gcov_read_unsigned ();
503   for (unsigned i = 0; i < string_num; i++)
504     {
505       vector_.safe_push (get_original_name (gcov_read_string ()));
506       map_[vector_.last ()] = i;
507     }
508   return true;
509 }
510 
511 /* Member functions for function_instance.  */
512 
~function_instance()513 function_instance::~function_instance ()
514 {
515   for (callsite_map::iterator iter = callsites.begin ();
516        iter != callsites.end (); ++iter)
517     delete iter->second;
518 }
519 
520 /* Traverse callsites of the current function_instance to find one at the
521    location of LINENO and callee name represented in DECL.  */
522 
523 function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl)524 function_instance::get_function_instance_by_decl (unsigned lineno,
525                                                   tree decl) const
526 {
527   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
528   if (func_name_idx != -1)
529     {
530       callsite_map::const_iterator ret
531           = callsites.find (std::make_pair (lineno, func_name_idx));
532       if (ret != callsites.end ())
533         return ret->second;
534     }
535   func_name_idx
536       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
537   if (func_name_idx != -1)
538     {
539       callsite_map::const_iterator ret
540           = callsites.find (std::make_pair (lineno, func_name_idx));
541       if (ret != callsites.end ())
542         return ret->second;
543     }
544   if (DECL_ABSTRACT_ORIGIN (decl))
545     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
546 
547   return NULL;
548 }
549 
550 /* Store the profile info for LOC in INFO. Return TRUE if profile info
551    is found.  */
552 
553 bool
get_count_info(location_t loc,count_info * info)554 function_instance::get_count_info (location_t loc, count_info *info) const
555 {
556   position_count_map::const_iterator iter = pos_counts.find (loc);
557   if (iter == pos_counts.end ())
558     return false;
559   *info = iter->second;
560   return true;
561 }
562 
563 /* Mark LOC as annotated.  */
564 
565 void
mark_annotated(location_t loc)566 function_instance::mark_annotated (location_t loc)
567 {
568   position_count_map::iterator iter = pos_counts.find (loc);
569   if (iter == pos_counts.end ())
570     return;
571   iter->second.annotated = true;
572 }
573 
574 /* Read the inlined indirect call target profile for STMT and store it in
575    MAP, return the total count for all inlined indirect calls.  */
576 
577 gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map)578 function_instance::find_icall_target_map (gcall *stmt,
579                                           icall_target_map *map) const
580 {
581   gcov_type ret = 0;
582   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
583 
584   for (callsite_map::const_iterator iter = callsites.begin ();
585        iter != callsites.end (); ++iter)
586     {
587       unsigned callee = iter->second->name ();
588       /* Check if callsite location match the stmt.  */
589       if (iter->first.first != stmt_offset)
590         continue;
591       struct cgraph_node *node = cgraph_node::get_for_asmname (
592           get_identifier (afdo_string_table->get_name (callee)));
593       if (node == NULL)
594         continue;
595       if (!check_ic_target (stmt, node))
596         continue;
597       (*map)[callee] = iter->second->total_count ();
598       ret += iter->second->total_count ();
599     }
600   return ret;
601 }
602 
603 /* Read the profile and create a function_instance with head count as
604    HEAD_COUNT. Recursively read callsites to create nested function_instances
605    too. STACK is used to track the recursive creation process.  */
606 
607 /* function instance profile format:
608 
609    ENTRY_COUNT: 8 bytes
610    NAME_INDEX: 4 bytes
611    NUM_POS_COUNTS: 4 bytes
612    NUM_CALLSITES: 4 byte
613    POS_COUNT_1:
614      POS_1_OFFSET: 4 bytes
615      NUM_TARGETS: 4 bytes
616      COUNT: 8 bytes
617      TARGET_1:
618        VALUE_PROFILE_TYPE: 4 bytes
619        TARGET_IDX: 8 bytes
620        COUNT: 8 bytes
621      TARGET_2
622      ...
623      TARGET_n
624    POS_COUNT_2
625    ...
626    POS_COUNT_N
627    CALLSITE_1:
628      CALLSITE_1_OFFSET: 4 bytes
629      FUNCTION_INSTANCE_PROFILE (nested)
630    CALLSITE_2
631    ...
632    CALLSITE_n.  */
633 
634 function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)635 function_instance::read_function_instance (function_instance_stack *stack,
636                                            gcov_type head_count)
637 {
638   unsigned name = gcov_read_unsigned ();
639   unsigned num_pos_counts = gcov_read_unsigned ();
640   unsigned num_callsites = gcov_read_unsigned ();
641   function_instance *s = new function_instance (name, head_count);
642   stack->safe_push (s);
643 
644   for (unsigned i = 0; i < num_pos_counts; i++)
645     {
646       unsigned offset = gcov_read_unsigned () & 0xffff0000;
647       unsigned num_targets = gcov_read_unsigned ();
648       gcov_type count = gcov_read_counter ();
649       s->pos_counts[offset].count = count;
650       for (unsigned j = 0; j < stack->length (); j++)
651         (*stack)[j]->total_count_ += count;
652       for (unsigned j = 0; j < num_targets; j++)
653         {
654           /* Only indirect call target histogram is supported now.  */
655           gcov_read_unsigned ();
656           gcov_type target_idx = gcov_read_counter ();
657           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
658         }
659     }
660   for (unsigned i = 0; i < num_callsites; i++)
661     {
662       unsigned offset = gcov_read_unsigned ();
663       function_instance *callee_function_instance
664           = read_function_instance (stack, 0);
665       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
666           = callee_function_instance;
667     }
668   stack->pop ();
669   return s;
670 }
671 
672 /* Sum of counts that is used during annotation.  */
673 
674 gcov_type
total_annotated_count()675 function_instance::total_annotated_count () const
676 {
677   gcov_type ret = 0;
678   for (callsite_map::const_iterator iter = callsites.begin ();
679        iter != callsites.end (); ++iter)
680     ret += iter->second->total_annotated_count ();
681   for (position_count_map::const_iterator iter = pos_counts.begin ();
682        iter != pos_counts.end (); ++iter)
683     if (iter->second.annotated)
684       ret += iter->second.count;
685   return ret;
686 }
687 
688 /* Member functions for autofdo_source_profile.  */
689 
~autofdo_source_profile()690 autofdo_source_profile::~autofdo_source_profile ()
691 {
692   for (name_function_instance_map::const_iterator iter = map_.begin ();
693        iter != map_.end (); ++iter)
694     delete iter->second;
695 }
696 
697 /* For a given DECL, returns the top-level function_instance.  */
698 
699 function_instance *
get_function_instance_by_decl(tree decl)700 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
701 {
702   int index = afdo_string_table->get_index_by_decl (decl);
703   if (index == -1)
704     return NULL;
705   name_function_instance_map::const_iterator ret = map_.find (index);
706   return ret == map_.end () ? NULL : ret->second;
707 }
708 
709 /* Find count_info for a given gimple STMT. If found, store the count_info
710    in INFO and return true; otherwise return false.  */
711 
712 bool
get_count_info(gimple * stmt,count_info * info)713 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
714 {
715   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
716     return false;
717 
718   inline_stack stack;
719   get_inline_stack (gimple_location (stmt), &stack);
720   if (stack.length () == 0)
721     return false;
722   function_instance *s = get_function_instance_by_inline_stack (stack);
723   if (s == NULL)
724     return false;
725   return s->get_count_info (stack[0].second, info);
726 }
727 
728 /* Mark LOC as annotated.  */
729 
730 void
mark_annotated(location_t loc)731 autofdo_source_profile::mark_annotated (location_t loc)
732 {
733   inline_stack stack;
734   get_inline_stack (loc, &stack);
735   if (stack.length () == 0)
736     return;
737   function_instance *s = get_function_instance_by_inline_stack (stack);
738   if (s == NULL)
739     return;
740   s->mark_annotated (stack[0].second);
741 }
742 
743 /* Update value profile INFO for STMT from the inlined indirect callsite.
744    Return true if INFO is updated.  */
745 
746 bool
update_inlined_ind_target(gcall * stmt,count_info * info)747 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
748                                                    count_info *info)
749 {
750   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
751     return false;
752 
753   count_info old_info;
754   get_count_info (stmt, &old_info);
755   gcov_type total = 0;
756   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
757        iter != old_info.targets.end (); ++iter)
758     total += iter->second;
759 
760   /* Program behavior changed, original promoted (and inlined) target is not
761      hot any more. Will avoid promote the original target.
762 
763      To check if original promoted target is still hot, we check the total
764      count of the unpromoted targets (stored in old_info). If it is no less
765      than half of the callsite count (stored in INFO), the original promoted
766      target is considered not hot any more.  */
767   if (total >= info->count / 2)
768     return false;
769 
770   inline_stack stack;
771   get_inline_stack (gimple_location (stmt), &stack);
772   if (stack.length () == 0)
773     return false;
774   function_instance *s = get_function_instance_by_inline_stack (stack);
775   if (s == NULL)
776     return false;
777   icall_target_map map;
778   if (s->find_icall_target_map (stmt, &map) == 0)
779     return false;
780   for (icall_target_map::const_iterator iter = map.begin ();
781        iter != map.end (); ++iter)
782     info->targets[iter->first] = iter->second;
783   return true;
784 }
785 
786 /* Find total count of the callee of EDGE.  */
787 
788 gcov_type
get_callsite_total_count(struct cgraph_edge * edge)789 autofdo_source_profile::get_callsite_total_count (
790     struct cgraph_edge *edge) const
791 {
792   inline_stack stack;
793   stack.safe_push (std::make_pair (edge->callee->decl, 0));
794   get_inline_stack (gimple_location (edge->call_stmt), &stack);
795 
796   function_instance *s = get_function_instance_by_inline_stack (stack);
797   if (s == NULL
798       || afdo_string_table->get_index (IDENTIFIER_POINTER (
799              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
800     return 0;
801 
802   return s->total_count ();
803 }
804 
805 /* Read AutoFDO profile and returns TRUE on success.  */
806 
807 /* source profile format:
808 
809    GCOV_TAG_AFDO_FUNCTION: 4 bytes
810    LENGTH: 4 bytes
811    NUM_FUNCTIONS: 4 bytes
812    FUNCTION_INSTANCE_1
813    FUNCTION_INSTANCE_2
814    ...
815    FUNCTION_INSTANCE_N.  */
816 
817 bool
read()818 autofdo_source_profile::read ()
819 {
820   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
821     {
822       inform (0, "Not expected TAG.");
823       return false;
824     }
825 
826   /* Skip the length of the section.  */
827   gcov_read_unsigned ();
828 
829   /* Read in the function/callsite profile, and store it in local
830      data structure.  */
831   unsigned function_num = gcov_read_unsigned ();
832   for (unsigned i = 0; i < function_num; i++)
833     {
834       function_instance::function_instance_stack stack;
835       function_instance *s = function_instance::read_function_instance (
836           &stack, gcov_read_counter ());
837       afdo_profile_info->sum_all += s->total_count ();
838       map_[s->name ()] = s;
839     }
840   return true;
841 }
842 
843 /* Return the function_instance in the profile that correspond to the
844    inline STACK.  */
845 
846 function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack)847 autofdo_source_profile::get_function_instance_by_inline_stack (
848     const inline_stack &stack) const
849 {
850   name_function_instance_map::const_iterator iter = map_.find (
851       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
852   if (iter == map_.end())
853     return NULL;
854   function_instance *s = iter->second;
855   for (unsigned i = stack.length() - 1; i > 0; i--)
856     {
857       s = s->get_function_instance_by_decl (
858           stack[i].second, stack[i - 1].first);
859       if (s == NULL)
860         return NULL;
861     }
862   return s;
863 }
864 
865 /* Module profile is only used by LIPO. Here we simply ignore it.  */
866 
867 static void
fake_read_autofdo_module_profile()868 fake_read_autofdo_module_profile ()
869 {
870   /* Read in the module info.  */
871   gcov_read_unsigned ();
872 
873   /* Skip the length of the section.  */
874   gcov_read_unsigned ();
875 
876   /* Read in the file name table.  */
877   unsigned total_module_num = gcov_read_unsigned ();
878   gcc_assert (total_module_num == 0);
879 }
880 
881 /* Read data from profile data file.  */
882 
883 static void
read_profile(void)884 read_profile (void)
885 {
886   if (gcov_open (auto_profile_file, 1) == 0)
887     error ("Cannot open profile file %s.", auto_profile_file);
888 
889   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
890     error ("AutoFDO profile magic number does not mathch.");
891 
892   /* Skip the version number.  */
893   unsigned version = gcov_read_unsigned ();
894   if (version != AUTO_PROFILE_VERSION)
895     error ("AutoFDO profile version %u does match %u.",
896            version, AUTO_PROFILE_VERSION);
897 
898   /* Skip the empty integer.  */
899   gcov_read_unsigned ();
900 
901   /* string_table.  */
902   afdo_string_table = new string_table ();
903   if (!afdo_string_table->read())
904     error ("Cannot read string table from %s.", auto_profile_file);
905 
906   /* autofdo_source_profile.  */
907   afdo_source_profile = autofdo_source_profile::create ();
908   if (afdo_source_profile == NULL)
909     error ("Cannot read function profile from %s.", auto_profile_file);
910 
911   /* autofdo_module_profile.  */
912   fake_read_autofdo_module_profile ();
913 
914   /* Read in the working set.  */
915   if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
916     error ("Cannot read working set from %s.", auto_profile_file);
917 
918   /* Skip the length of the section.  */
919   gcov_read_unsigned ();
920   gcov_working_set_t set[128];
921   for (unsigned i = 0; i < 128; i++)
922     {
923       set[i].num_counters = gcov_read_unsigned ();
924       set[i].min_counter = gcov_read_counter ();
925     }
926   add_working_set (set);
927 }
928 
929 /* From AutoFDO profiles, find values inside STMT for that we want to measure
930    histograms for indirect-call optimization.
931 
932    This function is actually served for 2 purposes:
933      * before annotation, we need to mark histogram, promote and inline
934      * after annotation, we just need to mark, and let follow-up logic to
935        decide if it needs to promote and inline.  */
936 
937 static void
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)938 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
939                     bool transform)
940 {
941   gimple *gs = gsi_stmt (*gsi);
942   tree callee;
943 
944   if (map.size () == 0)
945     return;
946   gcall *stmt = dyn_cast <gcall *> (gs);
947   if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
948     return;
949 
950   callee = gimple_call_fn (stmt);
951 
952   histogram_value hist = gimple_alloc_histogram_value (
953       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
954   hist->n_counters = 3;
955   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
956   gimple_add_histogram_value (cfun, stmt, hist);
957 
958   gcov_type total = 0;
959   icall_target_map::const_iterator max_iter = map.end ();
960 
961   for (icall_target_map::const_iterator iter = map.begin ();
962        iter != map.end (); ++iter)
963     {
964       total += iter->second;
965       if (max_iter == map.end () || max_iter->second < iter->second)
966         max_iter = iter;
967     }
968 
969   hist->hvalue.counters[0]
970       = (unsigned long long)afdo_string_table->get_name (max_iter->first);
971   hist->hvalue.counters[1] = max_iter->second;
972   hist->hvalue.counters[2] = total;
973 
974   if (!transform)
975     return;
976 
977   struct cgraph_edge *indirect_edge
978       = cgraph_node::get (current_function_decl)->get_edge (stmt);
979   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
980       get_identifier ((const char *) hist->hvalue.counters[0]));
981 
982   if (direct_call == NULL || !check_ic_target (stmt, direct_call))
983     return;
984   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
985     return;
986   struct cgraph_edge *new_edge
987       = indirect_edge->make_speculative (direct_call, 0, 0);
988   new_edge->redirect_call_stmt_to_callee ();
989   gimple_remove_histogram_value (cfun, stmt, hist);
990   inline_call (new_edge, true, NULL, NULL, false);
991 }
992 
993 /* From AutoFDO profiles, find values inside STMT for that we want to measure
994    histograms and adds them to list VALUES.  */
995 
996 static void
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)997 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
998           bool transform)
999 {
1000   afdo_indirect_call (gsi, map, transform);
1001 }
1002 
1003 typedef std::set<basic_block> bb_set;
1004 typedef std::set<edge> edge_set;
1005 
1006 static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1007 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1008 {
1009   return annotated.find (bb) != annotated.end ();
1010 }
1011 
1012 static void
set_bb_annotated(basic_block bb,bb_set * annotated)1013 set_bb_annotated (basic_block bb, bb_set *annotated)
1014 {
1015   annotated->insert (bb);
1016 }
1017 
1018 static bool
is_edge_annotated(const edge e,const edge_set & annotated)1019 is_edge_annotated (const edge e, const edge_set &annotated)
1020 {
1021   return annotated.find (e) != annotated.end ();
1022 }
1023 
1024 static void
set_edge_annotated(edge e,edge_set * annotated)1025 set_edge_annotated (edge e, edge_set *annotated)
1026 {
1027   annotated->insert (e);
1028 }
1029 
1030 /* For a given BB, set its execution count. Attach value profile if a stmt
1031    is not in PROMOTED, because we only want to promote an indirect call once.
1032    Return TRUE if BB is annotated.  */
1033 
1034 static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1035 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1036 {
1037   gimple_stmt_iterator gsi;
1038   edge e;
1039   edge_iterator ei;
1040   gcov_type max_count = 0;
1041   bool has_annotated = false;
1042 
1043   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1044     {
1045       count_info info;
1046       gimple *stmt = gsi_stmt (gsi);
1047       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1048         continue;
1049       if (afdo_source_profile->get_count_info (stmt, &info))
1050         {
1051           if (info.count > max_count)
1052             max_count = info.count;
1053           has_annotated = true;
1054           if (info.targets.size () > 0
1055               && promoted.find (stmt) == promoted.end ())
1056             afdo_vpt (&gsi, info.targets, false);
1057         }
1058     }
1059 
1060   if (!has_annotated)
1061     return false;
1062 
1063   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1064     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1065   for (gphi_iterator gpi = gsi_start_phis (bb);
1066        !gsi_end_p (gpi);
1067        gsi_next (&gpi))
1068     {
1069       gphi *phi = gpi.phi ();
1070       size_t i;
1071       for (i = 0; i < gimple_phi_num_args (phi); i++)
1072         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1073     }
1074   FOR_EACH_EDGE (e, ei, bb->succs)
1075   afdo_source_profile->mark_annotated (e->goto_locus);
1076 
1077   bb->count = max_count;
1078   return true;
1079 }
1080 
1081 /* BB1 and BB2 are in an equivalent class iff:
1082    1. BB1 dominates BB2.
1083    2. BB2 post-dominates BB1.
1084    3. BB1 and BB2 are in the same loop nest.
1085    This function finds the equivalent class for each basic block, and
1086    stores a pointer to the first BB in its equivalent class. Meanwhile,
1087    set bb counts for the same equivalent class to be idenical. Update
1088    ANNOTATED_BB for the first BB in its equivalent class.  */
1089 
1090 static void
afdo_find_equiv_class(bb_set * annotated_bb)1091 afdo_find_equiv_class (bb_set *annotated_bb)
1092 {
1093   basic_block bb;
1094 
1095   FOR_ALL_BB_FN (bb, cfun)
1096   bb->aux = NULL;
1097 
1098   FOR_ALL_BB_FN (bb, cfun)
1099   {
1100     vec<basic_block> dom_bbs;
1101     basic_block bb1;
1102     int i;
1103 
1104     if (bb->aux != NULL)
1105       continue;
1106     bb->aux = bb;
1107     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1108     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1109       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1110 	  && bb1->loop_father == bb->loop_father)
1111 	{
1112 	  bb1->aux = bb;
1113 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1114 	    {
1115 	      bb->count = bb1->count;
1116 	      set_bb_annotated (bb, annotated_bb);
1117 	    }
1118 	}
1119     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1120     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1121       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1122 	  && bb1->loop_father == bb->loop_father)
1123 	{
1124 	  bb1->aux = bb;
1125 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1126 	    {
1127 	      bb->count = bb1->count;
1128 	      set_bb_annotated (bb, annotated_bb);
1129 	    }
1130 	}
1131   }
1132 }
1133 
1134 /* If a basic block's count is known, and only one of its in/out edges' count
1135    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1136    edges' counts are known, then the basic block's unknown count can also be
1137    calculated.
1138    IS_SUCC is true if out edges of a basic blocks are examined.
1139    Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1140    Return TRUE if any basic block/edge count is changed.  */
1141 
1142 static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb,edge_set * annotated_edge)1143 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1144                      edge_set *annotated_edge)
1145 {
1146   basic_block bb;
1147   bool changed = false;
1148 
1149   FOR_EACH_BB_FN (bb, cfun)
1150   {
1151     edge e, unknown_edge = NULL;
1152     edge_iterator ei;
1153     int num_unknown_edge = 0;
1154     gcov_type total_known_count = 0;
1155 
1156     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1157       if (!is_edge_annotated (e, *annotated_edge))
1158 	num_unknown_edge++, unknown_edge = e;
1159       else
1160 	total_known_count += e->count;
1161 
1162     if (num_unknown_edge == 0)
1163       {
1164         if (total_known_count > bb->count)
1165           {
1166             bb->count = total_known_count;
1167             changed = true;
1168           }
1169         if (!is_bb_annotated (bb, *annotated_bb))
1170           {
1171             set_bb_annotated (bb, annotated_bb);
1172             changed = true;
1173           }
1174       }
1175     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1176       {
1177         if (bb->count >= total_known_count)
1178           unknown_edge->count = bb->count - total_known_count;
1179         else
1180           unknown_edge->count = 0;
1181         set_edge_annotated (unknown_edge, annotated_edge);
1182         changed = true;
1183       }
1184   }
1185   return changed;
1186 }
1187 
1188 /* Special propagation for circuit expressions. Because GCC translates
1189    control flow into data flow for circuit expressions. E.g.
1190    BB1:
1191    if (a && b)
1192      BB2
1193    else
1194      BB3
1195 
1196    will be translated into:
1197 
1198    BB1:
1199      if (a)
1200        goto BB.t1
1201      else
1202        goto BB.t3
1203    BB.t1:
1204      if (b)
1205        goto BB.t2
1206      else
1207        goto BB.t3
1208    BB.t2:
1209      goto BB.t3
1210    BB.t3:
1211      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1212      if (tmp)
1213        goto BB2
1214      else
1215        goto BB3
1216 
1217    In this case, we need to propagate through PHI to determine the edge
1218    count of BB1->BB.t1, BB.t1->BB.t2.
1219    Update ANNOTATED_EDGE accordingly.  */
1220 
1221 static void
afdo_propagate_circuit(const bb_set & annotated_bb,edge_set * annotated_edge)1222 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1223 {
1224   basic_block bb;
1225   FOR_ALL_BB_FN (bb, cfun)
1226   {
1227     gimple *def_stmt;
1228     tree cmp_rhs, cmp_lhs;
1229     gimple *cmp_stmt = last_stmt (bb);
1230     edge e;
1231     edge_iterator ei;
1232 
1233     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1234       continue;
1235     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1236     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1237     if (!TREE_CONSTANT (cmp_rhs)
1238         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1239       continue;
1240     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1241       continue;
1242     if (!is_bb_annotated (bb, annotated_bb))
1243       continue;
1244     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1245     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1246            && gimple_assign_single_p (def_stmt)
1247            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1248       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1249     if (!def_stmt)
1250       continue;
1251     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1252     if (!phi_stmt)
1253       continue;
1254     FOR_EACH_EDGE (e, ei, bb->succs)
1255     {
1256       unsigned i, total = 0;
1257       edge only_one;
1258       bool check_value_one = (((integer_onep (cmp_rhs))
1259                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1260                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1261       if (!is_edge_annotated (e, *annotated_edge))
1262         continue;
1263       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1264         {
1265           tree val = gimple_phi_arg_def (phi_stmt, i);
1266           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1267 
1268           if (!TREE_CONSTANT (val)
1269               || !(integer_zerop (val) || integer_onep (val)))
1270             continue;
1271           if (check_value_one ^ integer_onep (val))
1272             continue;
1273           total++;
1274           only_one = ep;
1275           if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1276             {
1277               ep->probability = 0;
1278               ep->count = 0;
1279               set_edge_annotated (ep, annotated_edge);
1280             }
1281         }
1282       if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1283         {
1284           only_one->probability = e->probability;
1285           only_one->count = e->count;
1286           set_edge_annotated (only_one, annotated_edge);
1287         }
1288     }
1289   }
1290 }
1291 
1292 /* Propagate the basic block count and edge count on the control flow
1293    graph. We do the propagation iteratively until stablize.  */
1294 
1295 static void
afdo_propagate(bb_set * annotated_bb,edge_set * annotated_edge)1296 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1297 {
1298   basic_block bb;
1299   bool changed = true;
1300   int i = 0;
1301 
1302   FOR_ALL_BB_FN (bb, cfun)
1303   {
1304     bb->count = ((basic_block)bb->aux)->count;
1305     if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1306       set_bb_annotated (bb, annotated_bb);
1307   }
1308 
1309   while (changed && i++ < 10)
1310     {
1311       changed = false;
1312 
1313       if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1314         changed = true;
1315       if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1316         changed = true;
1317       afdo_propagate_circuit (*annotated_bb, annotated_edge);
1318     }
1319 }
1320 
1321 /* Propagate counts on control flow graph and calculate branch
1322    probabilities.  */
1323 
1324 static void
afdo_calculate_branch_prob(bb_set * annotated_bb,edge_set * annotated_edge)1325 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1326 {
1327   basic_block bb;
1328   bool has_sample = false;
1329 
1330   FOR_EACH_BB_FN (bb, cfun)
1331   {
1332     if (bb->count > 0)
1333       {
1334 	has_sample = true;
1335 	break;
1336       }
1337   }
1338 
1339   if (!has_sample)
1340     return;
1341 
1342   calculate_dominance_info (CDI_POST_DOMINATORS);
1343   calculate_dominance_info (CDI_DOMINATORS);
1344   loop_optimizer_init (0);
1345 
1346   afdo_find_equiv_class (annotated_bb);
1347   afdo_propagate (annotated_bb, annotated_edge);
1348 
1349   FOR_EACH_BB_FN (bb, cfun)
1350   {
1351     edge e;
1352     edge_iterator ei;
1353     int num_unknown_succ = 0;
1354     gcov_type total_count = 0;
1355 
1356     FOR_EACH_EDGE (e, ei, bb->succs)
1357     {
1358       if (!is_edge_annotated (e, *annotated_edge))
1359         num_unknown_succ++;
1360       else
1361         total_count += e->count;
1362     }
1363     if (num_unknown_succ == 0 && total_count > 0)
1364       {
1365         FOR_EACH_EDGE (e, ei, bb->succs)
1366         e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1367       }
1368   }
1369   FOR_ALL_BB_FN (bb, cfun)
1370   {
1371     edge e;
1372     edge_iterator ei;
1373 
1374     FOR_EACH_EDGE (e, ei, bb->succs)
1375       e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1376     bb->aux = NULL;
1377   }
1378 
1379   loop_optimizer_finalize ();
1380   free_dominance_info (CDI_DOMINATORS);
1381   free_dominance_info (CDI_POST_DOMINATORS);
1382 }
1383 
1384 /* Perform value profile transformation using AutoFDO profile. Add the
1385    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1386    indirect call promoted.  */
1387 
1388 static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1389 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1390 {
1391   basic_block bb;
1392   if (afdo_source_profile->get_function_instance_by_decl (
1393           current_function_decl) == NULL)
1394     return false;
1395 
1396   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1397 
1398   bool has_vpt = false;
1399   FOR_EACH_BB_FN (bb, cfun)
1400   {
1401     if (!has_indirect_call (bb))
1402       continue;
1403     gimple_stmt_iterator gsi;
1404 
1405     gcov_type bb_count = 0;
1406     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1407       {
1408         count_info info;
1409 	gimple *stmt = gsi_stmt (gsi);
1410         if (afdo_source_profile->get_count_info (stmt, &info))
1411           bb_count = MAX (bb_count, info.count);
1412       }
1413 
1414     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1415       {
1416         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1417         /* IC_promotion and early_inline_2 is done in multiple iterations.
1418            No need to promoted the stmt if its in promoted_stmts (means
1419            it is already been promoted in the previous iterations).  */
1420         if ((!stmt) || gimple_call_fn (stmt) == NULL
1421             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1422             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1423           continue;
1424 
1425         count_info info;
1426         afdo_source_profile->get_count_info (stmt, &info);
1427         info.count = bb_count;
1428         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1429           {
1430             /* Promote the indirect call and update the promoted_stmts.  */
1431             promoted_stmts->insert (stmt);
1432             afdo_vpt (&gsi, info.targets, true);
1433             has_vpt = true;
1434           }
1435       }
1436   }
1437 
1438   if (has_vpt)
1439     {
1440       unsigned todo = optimize_inline_calls (current_function_decl);
1441       if (todo & TODO_update_ssa_any)
1442        update_ssa (TODO_update_ssa);
1443       return true;
1444     }
1445 
1446   return false;
1447 }
1448 
1449 /* Annotate auto profile to the control flow graph. Do not annotate value
1450    profile for stmts in PROMOTED_STMTS.  */
1451 
1452 static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1453 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1454 {
1455   basic_block bb;
1456   bb_set annotated_bb;
1457   edge_set annotated_edge;
1458   const function_instance *s
1459       = afdo_source_profile->get_function_instance_by_decl (
1460           current_function_decl);
1461 
1462   if (s == NULL)
1463     return;
1464   cgraph_node::get (current_function_decl)->count = s->head_count ();
1465   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1466   gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1467 
1468   FOR_EACH_BB_FN (bb, cfun)
1469   {
1470     edge e;
1471     edge_iterator ei;
1472 
1473     bb->count = 0;
1474     FOR_EACH_EDGE (e, ei, bb->succs)
1475       e->count = 0;
1476 
1477     if (afdo_set_bb_count (bb, promoted_stmts))
1478       set_bb_annotated (bb, &annotated_bb);
1479     if (bb->count > max_count)
1480       max_count = bb->count;
1481   }
1482   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1483       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1484     {
1485       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1486           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1487       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1488     }
1489   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1490       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1491     {
1492       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1493           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1494       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1495     }
1496   afdo_source_profile->mark_annotated (
1497       DECL_SOURCE_LOCATION (current_function_decl));
1498   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1499   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1500   if (max_count > 0)
1501     {
1502       afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1503       counts_to_freqs ();
1504       profile_status_for_fn (cfun) = PROFILE_READ;
1505     }
1506   if (flag_value_profile_transformations)
1507     {
1508       gimple_value_profile_transformations ();
1509       free_dominance_info (CDI_DOMINATORS);
1510       free_dominance_info (CDI_POST_DOMINATORS);
1511       update_ssa (TODO_update_ssa);
1512     }
1513 }
1514 
1515 /* Wrapper function to invoke early inliner.  */
1516 
1517 static void
early_inline()1518 early_inline ()
1519 {
1520   compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1521   unsigned todo = early_inliner (cfun);
1522   if (todo & TODO_update_ssa_any)
1523     update_ssa (TODO_update_ssa);
1524 }
1525 
1526 /* Use AutoFDO profile to annoate the control flow graph.
1527    Return the todo flag.  */
1528 
1529 static unsigned int
auto_profile(void)1530 auto_profile (void)
1531 {
1532   struct cgraph_node *node;
1533 
1534   if (symtab->state == FINISHED)
1535     return 0;
1536 
1537   init_node_map (true);
1538   profile_info = autofdo::afdo_profile_info;
1539 
1540   FOR_EACH_FUNCTION (node)
1541   {
1542     if (!gimple_has_body_p (node->decl))
1543       continue;
1544 
1545     /* Don't profile functions produced for builtin stuff.  */
1546     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1547       continue;
1548 
1549     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1550 
1551     /* First do indirect call promotion and early inline to make the
1552        IR match the profiled binary before actual annotation.
1553 
1554        This is needed because an indirect call might have been promoted
1555        and inlined in the profiled binary. If we do not promote and
1556        inline these indirect calls before annotation, the profile for
1557        these promoted functions will be lost.
1558 
1559        e.g. foo() --indirect_call--> bar()
1560        In profiled binary, the callsite is promoted and inlined, making
1561        the profile look like:
1562 
1563        foo: {
1564          loc_foo_1: count_1
1565          bar@loc_foo_2: {
1566            loc_bar_1: count_2
1567            loc_bar_2: count_3
1568          }
1569        }
1570 
1571        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1572        If we perform annotation on it, the profile inside bar@loc_foo2
1573        will be wasted.
1574 
1575        To avoid this, we promote loc_foo_2 and inline the promoted bar
1576        function before annotation, so the profile inside bar@loc_foo2
1577        will be useful.  */
1578     autofdo::stmt_set promoted_stmts;
1579     for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1580       {
1581         if (!flag_value_profile_transformations
1582             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1583           break;
1584         early_inline ();
1585       }
1586 
1587     early_inline ();
1588     autofdo::afdo_annotate_cfg (promoted_stmts);
1589     compute_function_frequency ();
1590 
1591     /* Local pure-const may imply need to fixup the cfg.  */
1592     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1593       cleanup_tree_cfg ();
1594 
1595     free_dominance_info (CDI_DOMINATORS);
1596     free_dominance_info (CDI_POST_DOMINATORS);
1597     cgraph_edge::rebuild_edges ();
1598     compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1599     pop_cfun ();
1600   }
1601 
1602   return TODO_rebuild_cgraph_edges;
1603 }
1604 } /* namespace autofdo.  */
1605 
1606 /* Read the profile from the profile data file.  */
1607 
1608 void
read_autofdo_file(void)1609 read_autofdo_file (void)
1610 {
1611   if (auto_profile_file == NULL)
1612     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1613 
1614   autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1615       1, sizeof (struct gcov_ctr_summary));
1616   autofdo::afdo_profile_info->runs = 1;
1617   autofdo::afdo_profile_info->sum_max = 0;
1618   autofdo::afdo_profile_info->sum_all = 0;
1619 
1620   /* Read the profile from the profile file.  */
1621   autofdo::read_profile ();
1622 }
1623 
1624 /* Free the resources.  */
1625 
1626 void
end_auto_profile(void)1627 end_auto_profile (void)
1628 {
1629   delete autofdo::afdo_source_profile;
1630   delete autofdo::afdo_string_table;
1631   profile_info = NULL;
1632 }
1633 
1634 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1635 
1636 bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1637 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1638 {
1639   gcov_type count
1640       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1641 
1642   if (count > 0)
1643     {
1644       bool is_hot;
1645       const struct gcov_ctr_summary *saved_profile_info = profile_info;
1646       /* At early inline stage, profile_info is not set yet. We need to
1647          temporarily set it to afdo_profile_info to calculate hotness.  */
1648       profile_info = autofdo::afdo_profile_info;
1649       is_hot = maybe_hot_count_p (NULL, count);
1650       profile_info = saved_profile_info;
1651       return is_hot;
1652     }
1653 
1654   return false;
1655 }
1656 
1657 namespace
1658 {
1659 
1660 const pass_data pass_data_ipa_auto_profile = {
1661   SIMPLE_IPA_PASS, "afdo", /* name */
1662   OPTGROUP_NONE,           /* optinfo_flags */
1663   TV_IPA_AUTOFDO,          /* tv_id */
1664   0,                       /* properties_required */
1665   0,                       /* properties_provided */
1666   0,                       /* properties_destroyed */
1667   0,                       /* todo_flags_start */
1668   0,                       /* todo_flags_finish */
1669 };
1670 
1671 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1672 {
1673 public:
pass_ipa_auto_profile(gcc::context * ctxt)1674   pass_ipa_auto_profile (gcc::context *ctxt)
1675       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1676   {
1677   }
1678 
1679   /* opt_pass methods: */
1680   virtual bool
gate(function *)1681   gate (function *)
1682   {
1683     return flag_auto_profile;
1684   }
1685   virtual unsigned int
execute(function *)1686   execute (function *)
1687   {
1688     return autofdo::auto_profile ();
1689   }
1690 }; // class pass_ipa_auto_profile
1691 
1692 } // anon namespace
1693 
1694 simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1695 make_pass_ipa_auto_profile (gcc::context *ctxt)
1696 {
1697   return new pass_ipa_auto_profile (ctxt);
1698 }
1699