1 /* Read and annotate call graph profile from the auto profile data file.
2    Copyright (C) 2014-2019 Free Software Foundation, Inc.
3    Contributed by Dehao Chen (dehao@google.com)
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "params.h"
45 #include "symbol-summary.h"
46 #include "ipa-prop.h"
47 #include "ipa-fnsummary.h"
48 #include "ipa-inline.h"
49 #include "tree-inline.h"
50 #include "auto-profile.h"
51 #include "tree-pretty-print.h"
52 #include "gimple-pretty-print.h"
53 
54 /* The following routines implements AutoFDO optimization.
55 
56    This optimization uses sampling profiles to annotate basic block counts
57    and uses heuristics to estimate branch probabilities.
58 
59    There are three phases in AutoFDO:
60 
61    Phase 1: Read profile from the profile data file.
62      The following info is read from the profile datafile:
63         * string_table: a map between function name and its index.
64         * autofdo_source_profile: a map from function_instance name to
65           function_instance. This is represented as a forest of
66           function_instances.
67         * WorkingSet: a histogram of how many instructions are covered for a
68           given percentage of total cycles. This is describing the binary
69           level information (not source level). This info is used to help
70           decide if we want aggressive optimizations that could increase
71           code footprint (e.g. loop unroll etc.)
72      A function instance is an instance of function that could either be a
73      standalone symbol, or a clone of a function that is inlined into another
74      function.
75 
76    Phase 2: Early inline + value profile transformation.
77      Early inline uses autofdo_source_profile to find if a callsite is:
78         * inlined in the profiled binary.
79         * callee body is hot in the profiling run.
80      If both condition satisfies, early inline will inline the callsite
81      regardless of the code growth.
82      Phase 2 is an iterative process. During each iteration, we also check
83      if an indirect callsite is promoted and inlined in the profiling run.
84      If yes, vpt will happen to force promote it and in the next iteration,
85      einline will inline the promoted callsite in the next iteration.
86 
87    Phase 3: Annotate control flow graph.
88      AutoFDO uses a separate pass to:
89         * Annotate basic block count
90         * Estimate branch probability
91 
92    After the above 3 phases, all profile is readily annotated on the GCC IR.
93    AutoFDO tries to reuse all FDO infrastructure as much as possible to make
94    use of the profile. E.g. it uses existing mechanism to calculate the basic
95    block/edge frequency, as well as the cgraph node/edge count.
96 */
97 
98 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
99 #define AUTO_PROFILE_VERSION 1
100 
101 namespace autofdo
102 {
103 
104 /* Intermediate edge info used when propagating AutoFDO profile information.
105    We can't edge->count() directly since it's computed from edge's probability
106    while probability is yet not decided during propagation.  */
107 #define AFDO_EINFO(e)                     ((struct edge_info *) e->aux)
108 class edge_info
109 {
110 public:
edge_info()111   edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
is_annotated()112   bool is_annotated () const { return annotated_; }
set_annotated()113   void set_annotated () { annotated_ = true; }
get_count()114   profile_count get_count () const { return count_; }
set_count(profile_count count)115   void set_count (profile_count count) { count_ = count; }
116 private:
117   profile_count count_;
118   bool annotated_;
119 };
120 
121 /* Represent a source location: (function_decl, lineno).  */
122 typedef std::pair<tree, unsigned> decl_lineno;
123 
124 /* Represent an inline stack. vector[0] is the leaf node.  */
125 typedef auto_vec<decl_lineno> inline_stack;
126 
127 /* String array that stores function names.  */
128 typedef auto_vec<char *> string_vector;
129 
130 /* Map from function name's index in string_table to target's
131    execution count.  */
132 typedef std::map<unsigned, gcov_type> icall_target_map;
133 
134 /* Set of gimple stmts. Used to track if the stmt has already been promoted
135    to direct call.  */
136 typedef std::set<gimple *> stmt_set;
137 
138 /* Represent count info of an inline stack.  */
139 struct count_info
140 {
141   /* Sampled count of the inline stack.  */
142   gcov_type count;
143 
144   /* Map from indirect call target to its sample count.  */
145   icall_target_map targets;
146 
147   /* Whether this inline stack is already used in annotation.
148 
149      Each inline stack should only be used to annotate IR once.
150      This will be enforced when instruction-level discriminator
151      is supported.  */
152   bool annotated;
153 };
154 
155 /* operator< for "const char *".  */
156 struct string_compare
157 {
operatorstring_compare158   bool operator()(const char *a, const char *b) const
159   {
160     return strcmp (a, b) < 0;
161   }
162 };
163 
164 /* Store a string array, indexed by string position in the array.  */
165 class string_table
166 {
167 public:
string_table()168   string_table ()
169   {}
170 
171   ~string_table ();
172 
173   /* For a given string, returns its index.  */
174   int get_index (const char *name) const;
175 
176   /* For a given decl, returns the index of the decl name.  */
177   int get_index_by_decl (tree decl) const;
178 
179   /* For a given index, returns the string.  */
180   const char *get_name (int index) const;
181 
182   /* Read profile, return TRUE on success.  */
183   bool read ();
184 
185 private:
186   typedef std::map<const char *, unsigned, string_compare> string_index_map;
187   string_vector vector_;
188   string_index_map map_;
189 };
190 
191 /* Profile of a function instance:
192      1. total_count of the function.
193      2. head_count (entry basic block count) of the function (only valid when
194         function is a top-level function_instance, i.e. it is the original copy
195         instead of the inlined copy).
196      3. map from source location (decl_lineno) to profile (count_info).
197      4. map from callsite to callee function_instance.  */
198 class function_instance
199 {
200 public:
201   typedef auto_vec<function_instance *> function_instance_stack;
202 
203   /* Read the profile and return a function_instance with head count as
204      HEAD_COUNT. Recursively read callsites to create nested function_instances
205      too. STACK is used to track the recursive creation process.  */
206   static function_instance *
207   read_function_instance (function_instance_stack *stack,
208                           gcov_type head_count);
209 
210   /* Recursively deallocate all callsites (nested function_instances).  */
211   ~function_instance ();
212 
213   /* Accessors.  */
214   int
name()215   name () const
216   {
217     return name_;
218   }
219   gcov_type
total_count()220   total_count () const
221   {
222     return total_count_;
223   }
224   gcov_type
head_count()225   head_count () const
226   {
227     return head_count_;
228   }
229 
230   /* Traverse callsites of the current function_instance to find one at the
231      location of LINENO and callee name represented in DECL.  */
232   function_instance *get_function_instance_by_decl (unsigned lineno,
233                                                     tree decl) const;
234 
235   /* Store the profile info for LOC in INFO. Return TRUE if profile info
236      is found.  */
237   bool get_count_info (location_t loc, count_info *info) const;
238 
239   /* Read the inlined indirect call target profile for STMT and store it in
240      MAP, return the total count for all inlined indirect calls.  */
241   gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
242 
243   /* Sum of counts that is used during annotation.  */
244   gcov_type total_annotated_count () const;
245 
246   /* Mark LOC as annotated.  */
247   void mark_annotated (location_t loc);
248 
249 private:
250   /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
251   typedef std::pair<unsigned, unsigned> callsite;
252 
253   /* Map from callsite to callee function_instance.  */
254   typedef std::map<callsite, function_instance *> callsite_map;
255 
function_instance(unsigned name,gcov_type head_count)256   function_instance (unsigned name, gcov_type head_count)
257       : name_ (name), total_count_ (0), head_count_ (head_count)
258   {
259   }
260 
261   /* Map from source location (decl_lineno) to profile (count_info).  */
262   typedef std::map<unsigned, count_info> position_count_map;
263 
264   /* function_instance name index in the string_table.  */
265   unsigned name_;
266 
267   /* Total sample count.  */
268   gcov_type total_count_;
269 
270   /* Entry BB's sample count.  */
271   gcov_type head_count_;
272 
273   /* Map from callsite location to callee function_instance.  */
274   callsite_map callsites;
275 
276   /* Map from source location to count_info.  */
277   position_count_map pos_counts;
278 };
279 
280 /* Profile for all functions.  */
281 class autofdo_source_profile
282 {
283 public:
284   static autofdo_source_profile *
create()285   create ()
286   {
287     autofdo_source_profile *map = new autofdo_source_profile ();
288 
289     if (map->read ())
290       return map;
291     delete map;
292     return NULL;
293   }
294 
295   ~autofdo_source_profile ();
296 
297   /* For a given DECL, returns the top-level function_instance.  */
298   function_instance *get_function_instance_by_decl (tree decl) const;
299 
300   /* Find count_info for a given gimple STMT. If found, store the count_info
301      in INFO and return true; otherwise return false.  */
302   bool get_count_info (gimple *stmt, count_info *info) const;
303 
304   /* Find total count of the callee of EDGE.  */
305   gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
306 
307   /* Update value profile INFO for STMT from the inlined indirect callsite.
308      Return true if INFO is updated.  */
309   bool update_inlined_ind_target (gcall *stmt, count_info *info);
310 
311   /* Mark LOC as annotated.  */
312   void mark_annotated (location_t loc);
313 
314 private:
315   /* Map from function_instance name index (in string_table) to
316      function_instance.  */
317   typedef std::map<unsigned, function_instance *> name_function_instance_map;
318 
autofdo_source_profile()319   autofdo_source_profile () {}
320 
321   /* Read AutoFDO profile and returns TRUE on success.  */
322   bool read ();
323 
324   /* Return the function_instance in the profile that correspond to the
325      inline STACK.  */
326   function_instance *
327   get_function_instance_by_inline_stack (const inline_stack &stack) const;
328 
329   name_function_instance_map map_;
330 };
331 
332 /* Store the strings read from the profile data file.  */
333 static string_table *afdo_string_table;
334 
335 /* Store the AutoFDO source profile.  */
336 static autofdo_source_profile *afdo_source_profile;
337 
338 /* gcov_summary structure to store the profile_info.  */
339 static gcov_summary *afdo_profile_info;
340 
341 /* Helper functions.  */
342 
343 /* Return the original name of NAME: strip the suffix that starts
344    with '.' Caller is responsible for freeing RET.  */
345 
346 static char *
get_original_name(const char * name)347 get_original_name (const char *name)
348 {
349   char *ret = xstrdup (name);
350   char *find = strchr (ret, '.');
351   if (find != NULL)
352     *find = 0;
353   return ret;
354 }
355 
356 /* Return the combined location, which is a 32bit integer in which
357    higher 16 bits stores the line offset of LOC to the start lineno
358    of DECL, The lower 16 bits stores the discriminator.  */
359 
360 static unsigned
get_combined_location(location_t loc,tree decl)361 get_combined_location (location_t loc, tree decl)
362 {
363   /* TODO: allow more bits for line and less bits for discriminator.  */
364   if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
365     warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
366   return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
367 }
368 
369 /* Return the function decl of a given lexical BLOCK.  */
370 
371 static tree
get_function_decl_from_block(tree block)372 get_function_decl_from_block (tree block)
373 {
374   if (!inlined_function_outer_scope_p (block))
375     return NULL_TREE;
376 
377   return BLOCK_ABSTRACT_ORIGIN (block);
378 }
379 
380 /* Store inline stack for STMT in STACK.  */
381 
382 static void
get_inline_stack(location_t locus,inline_stack * stack)383 get_inline_stack (location_t locus, inline_stack *stack)
384 {
385   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
386     return;
387 
388   tree block = LOCATION_BLOCK (locus);
389   if (block && TREE_CODE (block) == BLOCK)
390     {
391       int level = 0;
392       for (block = BLOCK_SUPERCONTEXT (block);
393            block && (TREE_CODE (block) == BLOCK);
394            block = BLOCK_SUPERCONTEXT (block))
395         {
396           location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
397           if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
398             continue;
399 
400           tree decl = get_function_decl_from_block (block);
401           stack->safe_push (
402               std::make_pair (decl, get_combined_location (locus, decl)));
403           locus = tmp_locus;
404           level++;
405         }
406     }
407   stack->safe_push (
408       std::make_pair (current_function_decl,
409                       get_combined_location (locus, current_function_decl)));
410 }
411 
412 /* Return STMT's combined location, which is a 32bit integer in which
413    higher 16 bits stores the line offset of LOC to the start lineno
414    of DECL, The lower 16 bits stores the discriminator.  */
415 
416 static unsigned
get_relative_location_for_stmt(gimple * stmt)417 get_relative_location_for_stmt (gimple *stmt)
418 {
419   location_t locus = gimple_location (stmt);
420   if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
421     return UNKNOWN_LOCATION;
422 
423   for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
424        block = BLOCK_SUPERCONTEXT (block))
425     if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
426       return get_combined_location (locus,
427                                     get_function_decl_from_block (block));
428   return get_combined_location (locus, current_function_decl);
429 }
430 
431 /* Return true if BB contains indirect call.  */
432 
433 static bool
has_indirect_call(basic_block bb)434 has_indirect_call (basic_block bb)
435 {
436   gimple_stmt_iterator gsi;
437 
438   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
439     {
440       gimple *stmt = gsi_stmt (gsi);
441       if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
442           && (gimple_call_fn (stmt) == NULL
443               || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
444         return true;
445     }
446   return false;
447 }
448 
449 /* Member functions for string_table.  */
450 
451 /* Deconstructor.  */
452 
~string_table()453 string_table::~string_table ()
454 {
455   for (unsigned i = 0; i < vector_.length (); i++)
456     free (vector_[i]);
457 }
458 
459 
460 /* Return the index of a given function NAME. Return -1 if NAME is not
461    found in string table.  */
462 
463 int
get_index(const char * name)464 string_table::get_index (const char *name) const
465 {
466   if (name == NULL)
467     return -1;
468   string_index_map::const_iterator iter = map_.find (name);
469   if (iter == map_.end ())
470     return -1;
471 
472   return iter->second;
473 }
474 
475 /* Return the index of a given function DECL. Return -1 if DECL is not
476    found in string table.  */
477 
478 int
get_index_by_decl(tree decl)479 string_table::get_index_by_decl (tree decl) const
480 {
481   char *name
482       = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
483   int ret = get_index (name);
484   free (name);
485   if (ret != -1)
486     return ret;
487   ret = get_index (lang_hooks.dwarf_name (decl, 0));
488   if (ret != -1)
489     return ret;
490   if (DECL_FROM_INLINE (decl))
491     return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
492 
493   return -1;
494 }
495 
496 /* Return the function name of a given INDEX.  */
497 
498 const char *
get_name(int index)499 string_table::get_name (int index) const
500 {
501   gcc_assert (index > 0 && index < (int)vector_.length ());
502   return vector_[index];
503 }
504 
505 /* Read the string table. Return TRUE if reading is successful.  */
506 
507 bool
read()508 string_table::read ()
509 {
510   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
511     return false;
512   /* Skip the length of the section.  */
513   gcov_read_unsigned ();
514   /* Read in the file name table.  */
515   unsigned string_num = gcov_read_unsigned ();
516   for (unsigned i = 0; i < string_num; i++)
517     {
518       vector_.safe_push (get_original_name (gcov_read_string ()));
519       map_[vector_.last ()] = i;
520     }
521   return true;
522 }
523 
524 /* Member functions for function_instance.  */
525 
~function_instance()526 function_instance::~function_instance ()
527 {
528   for (callsite_map::iterator iter = callsites.begin ();
529        iter != callsites.end (); ++iter)
530     delete iter->second;
531 }
532 
533 /* Traverse callsites of the current function_instance to find one at the
534    location of LINENO and callee name represented in DECL.  */
535 
536 function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl)537 function_instance::get_function_instance_by_decl (unsigned lineno,
538                                                   tree decl) const
539 {
540   int func_name_idx = afdo_string_table->get_index_by_decl (decl);
541   if (func_name_idx != -1)
542     {
543       callsite_map::const_iterator ret
544           = callsites.find (std::make_pair (lineno, func_name_idx));
545       if (ret != callsites.end ())
546         return ret->second;
547     }
548   func_name_idx
549       = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
550   if (func_name_idx != -1)
551     {
552       callsite_map::const_iterator ret
553           = callsites.find (std::make_pair (lineno, func_name_idx));
554       if (ret != callsites.end ())
555         return ret->second;
556     }
557   if (DECL_FROM_INLINE (decl))
558     return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
559 
560   return NULL;
561 }
562 
563 /* Store the profile info for LOC in INFO. Return TRUE if profile info
564    is found.  */
565 
566 bool
get_count_info(location_t loc,count_info * info)567 function_instance::get_count_info (location_t loc, count_info *info) const
568 {
569   position_count_map::const_iterator iter = pos_counts.find (loc);
570   if (iter == pos_counts.end ())
571     return false;
572   *info = iter->second;
573   return true;
574 }
575 
576 /* Mark LOC as annotated.  */
577 
578 void
mark_annotated(location_t loc)579 function_instance::mark_annotated (location_t loc)
580 {
581   position_count_map::iterator iter = pos_counts.find (loc);
582   if (iter == pos_counts.end ())
583     return;
584   iter->second.annotated = true;
585 }
586 
587 /* Read the inlined indirect call target profile for STMT and store it in
588    MAP, return the total count for all inlined indirect calls.  */
589 
590 gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map)591 function_instance::find_icall_target_map (gcall *stmt,
592                                           icall_target_map *map) const
593 {
594   gcov_type ret = 0;
595   unsigned stmt_offset = get_relative_location_for_stmt (stmt);
596 
597   for (callsite_map::const_iterator iter = callsites.begin ();
598        iter != callsites.end (); ++iter)
599     {
600       unsigned callee = iter->second->name ();
601       /* Check if callsite location match the stmt.  */
602       if (iter->first.first != stmt_offset)
603         continue;
604       struct cgraph_node *node = cgraph_node::get_for_asmname (
605           get_identifier (afdo_string_table->get_name (callee)));
606       if (node == NULL)
607         continue;
608       if (!check_ic_target (stmt, node))
609         continue;
610       (*map)[callee] = iter->second->total_count ();
611       ret += iter->second->total_count ();
612     }
613   return ret;
614 }
615 
616 /* Read the profile and create a function_instance with head count as
617    HEAD_COUNT. Recursively read callsites to create nested function_instances
618    too. STACK is used to track the recursive creation process.  */
619 
620 /* function instance profile format:
621 
622    ENTRY_COUNT: 8 bytes
623    NAME_INDEX: 4 bytes
624    NUM_POS_COUNTS: 4 bytes
625    NUM_CALLSITES: 4 byte
626    POS_COUNT_1:
627      POS_1_OFFSET: 4 bytes
628      NUM_TARGETS: 4 bytes
629      COUNT: 8 bytes
630      TARGET_1:
631        VALUE_PROFILE_TYPE: 4 bytes
632        TARGET_IDX: 8 bytes
633        COUNT: 8 bytes
634      TARGET_2
635      ...
636      TARGET_n
637    POS_COUNT_2
638    ...
639    POS_COUNT_N
640    CALLSITE_1:
641      CALLSITE_1_OFFSET: 4 bytes
642      FUNCTION_INSTANCE_PROFILE (nested)
643    CALLSITE_2
644    ...
645    CALLSITE_n.  */
646 
647 function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)648 function_instance::read_function_instance (function_instance_stack *stack,
649                                            gcov_type head_count)
650 {
651   unsigned name = gcov_read_unsigned ();
652   unsigned num_pos_counts = gcov_read_unsigned ();
653   unsigned num_callsites = gcov_read_unsigned ();
654   function_instance *s = new function_instance (name, head_count);
655   stack->safe_push (s);
656 
657   for (unsigned i = 0; i < num_pos_counts; i++)
658     {
659       unsigned offset = gcov_read_unsigned () & 0xffff0000;
660       unsigned num_targets = gcov_read_unsigned ();
661       gcov_type count = gcov_read_counter ();
662       s->pos_counts[offset].count = count;
663       for (unsigned j = 0; j < stack->length (); j++)
664         (*stack)[j]->total_count_ += count;
665       for (unsigned j = 0; j < num_targets; j++)
666         {
667           /* Only indirect call target histogram is supported now.  */
668           gcov_read_unsigned ();
669           gcov_type target_idx = gcov_read_counter ();
670           s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
671         }
672     }
673   for (unsigned i = 0; i < num_callsites; i++)
674     {
675       unsigned offset = gcov_read_unsigned ();
676       function_instance *callee_function_instance
677           = read_function_instance (stack, 0);
678       s->callsites[std::make_pair (offset, callee_function_instance->name ())]
679           = callee_function_instance;
680     }
681   stack->pop ();
682   return s;
683 }
684 
685 /* Sum of counts that is used during annotation.  */
686 
687 gcov_type
total_annotated_count()688 function_instance::total_annotated_count () const
689 {
690   gcov_type ret = 0;
691   for (callsite_map::const_iterator iter = callsites.begin ();
692        iter != callsites.end (); ++iter)
693     ret += iter->second->total_annotated_count ();
694   for (position_count_map::const_iterator iter = pos_counts.begin ();
695        iter != pos_counts.end (); ++iter)
696     if (iter->second.annotated)
697       ret += iter->second.count;
698   return ret;
699 }
700 
701 /* Member functions for autofdo_source_profile.  */
702 
~autofdo_source_profile()703 autofdo_source_profile::~autofdo_source_profile ()
704 {
705   for (name_function_instance_map::const_iterator iter = map_.begin ();
706        iter != map_.end (); ++iter)
707     delete iter->second;
708 }
709 
710 /* For a given DECL, returns the top-level function_instance.  */
711 
712 function_instance *
get_function_instance_by_decl(tree decl)713 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
714 {
715   int index = afdo_string_table->get_index_by_decl (decl);
716   if (index == -1)
717     return NULL;
718   name_function_instance_map::const_iterator ret = map_.find (index);
719   return ret == map_.end () ? NULL : ret->second;
720 }
721 
722 /* Find count_info for a given gimple STMT. If found, store the count_info
723    in INFO and return true; otherwise return false.  */
724 
725 bool
get_count_info(gimple * stmt,count_info * info)726 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
727 {
728   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
729     return false;
730 
731   inline_stack stack;
732   get_inline_stack (gimple_location (stmt), &stack);
733   if (stack.length () == 0)
734     return false;
735   function_instance *s = get_function_instance_by_inline_stack (stack);
736   if (s == NULL)
737     return false;
738   return s->get_count_info (stack[0].second, info);
739 }
740 
741 /* Mark LOC as annotated.  */
742 
743 void
mark_annotated(location_t loc)744 autofdo_source_profile::mark_annotated (location_t loc)
745 {
746   inline_stack stack;
747   get_inline_stack (loc, &stack);
748   if (stack.length () == 0)
749     return;
750   function_instance *s = get_function_instance_by_inline_stack (stack);
751   if (s == NULL)
752     return;
753   s->mark_annotated (stack[0].second);
754 }
755 
756 /* Update value profile INFO for STMT from the inlined indirect callsite.
757    Return true if INFO is updated.  */
758 
759 bool
update_inlined_ind_target(gcall * stmt,count_info * info)760 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
761                                                    count_info *info)
762 {
763   if (dump_file)
764     {
765       fprintf (dump_file, "Checking indirect call -> direct call ");
766       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
767     }
768 
769   if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
770     {
771       if (dump_file)
772 	fprintf (dump_file, " good locus\n");
773       return false;
774     }
775 
776   count_info old_info;
777   get_count_info (stmt, &old_info);
778   gcov_type total = 0;
779   for (icall_target_map::const_iterator iter = old_info.targets.begin ();
780        iter != old_info.targets.end (); ++iter)
781     total += iter->second;
782 
783   /* Program behavior changed, original promoted (and inlined) target is not
784      hot any more. Will avoid promote the original target.
785 
786      To check if original promoted target is still hot, we check the total
787      count of the unpromoted targets (stored in TOTAL). If a callsite count
788      (stored in INFO) is smaller than half of the total count, the original
789      promoted target is considered not hot any more.  */
790   if (info->count < total / 2)
791     {
792       if (dump_file)
793 	fprintf (dump_file, " not hot anymore %ld < %ld",
794 		 (long)info->count,
795 		 (long)total /2);
796       return false;
797     }
798 
799   inline_stack stack;
800   get_inline_stack (gimple_location (stmt), &stack);
801   if (stack.length () == 0)
802     {
803       if (dump_file)
804 	fprintf (dump_file, " no inline stack\n");
805       return false;
806     }
807   function_instance *s = get_function_instance_by_inline_stack (stack);
808   if (s == NULL)
809     {
810       if (dump_file)
811 	fprintf (dump_file, " function not found in inline stack\n");
812       return false;
813     }
814   icall_target_map map;
815   if (s->find_icall_target_map (stmt, &map) == 0)
816     {
817       if (dump_file)
818 	fprintf (dump_file, " no target map\n");
819       return false;
820     }
821   for (icall_target_map::const_iterator iter = map.begin ();
822        iter != map.end (); ++iter)
823     info->targets[iter->first] = iter->second;
824   if (dump_file)
825     fprintf (dump_file, " looks good\n");
826   return true;
827 }
828 
829 /* Find total count of the callee of EDGE.  */
830 
831 gcov_type
get_callsite_total_count(struct cgraph_edge * edge)832 autofdo_source_profile::get_callsite_total_count (
833     struct cgraph_edge *edge) const
834 {
835   inline_stack stack;
836   stack.safe_push (std::make_pair (edge->callee->decl, 0));
837   get_inline_stack (gimple_location (edge->call_stmt), &stack);
838 
839   function_instance *s = get_function_instance_by_inline_stack (stack);
840   if (s == NULL
841       || afdo_string_table->get_index (IDENTIFIER_POINTER (
842              DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
843     return 0;
844 
845   return s->total_count ();
846 }
847 
848 /* Read AutoFDO profile and returns TRUE on success.  */
849 
850 /* source profile format:
851 
852    GCOV_TAG_AFDO_FUNCTION: 4 bytes
853    LENGTH: 4 bytes
854    NUM_FUNCTIONS: 4 bytes
855    FUNCTION_INSTANCE_1
856    FUNCTION_INSTANCE_2
857    ...
858    FUNCTION_INSTANCE_N.  */
859 
860 bool
read()861 autofdo_source_profile::read ()
862 {
863   if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
864     {
865       inform (UNKNOWN_LOCATION, "Not expected TAG.");
866       return false;
867     }
868 
869   /* Skip the length of the section.  */
870   gcov_read_unsigned ();
871 
872   /* Read in the function/callsite profile, and store it in local
873      data structure.  */
874   unsigned function_num = gcov_read_unsigned ();
875   for (unsigned i = 0; i < function_num; i++)
876     {
877       function_instance::function_instance_stack stack;
878       function_instance *s = function_instance::read_function_instance (
879           &stack, gcov_read_counter ());
880       map_[s->name ()] = s;
881     }
882   return true;
883 }
884 
885 /* Return the function_instance in the profile that correspond to the
886    inline STACK.  */
887 
888 function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack)889 autofdo_source_profile::get_function_instance_by_inline_stack (
890     const inline_stack &stack) const
891 {
892   name_function_instance_map::const_iterator iter = map_.find (
893       afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
894   if (iter == map_.end())
895     return NULL;
896   function_instance *s = iter->second;
897   for (unsigned i = stack.length() - 1; i > 0; i--)
898     {
899       s = s->get_function_instance_by_decl (
900           stack[i].second, stack[i - 1].first);
901       if (s == NULL)
902         return NULL;
903     }
904   return s;
905 }
906 
907 /* Module profile is only used by LIPO. Here we simply ignore it.  */
908 
909 static void
fake_read_autofdo_module_profile()910 fake_read_autofdo_module_profile ()
911 {
912   /* Read in the module info.  */
913   gcov_read_unsigned ();
914 
915   /* Skip the length of the section.  */
916   gcov_read_unsigned ();
917 
918   /* Read in the file name table.  */
919   unsigned total_module_num = gcov_read_unsigned ();
920   gcc_assert (total_module_num == 0);
921 }
922 
923 /* Read data from profile data file.  */
924 
925 static void
read_profile(void)926 read_profile (void)
927 {
928   if (gcov_open (auto_profile_file, 1) == 0)
929     {
930       error ("cannot open profile file %s", auto_profile_file);
931       return;
932     }
933 
934   if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
935     {
936       error ("AutoFDO profile magic number does not match");
937       return;
938     }
939 
940   /* Skip the version number.  */
941   unsigned version = gcov_read_unsigned ();
942   if (version != AUTO_PROFILE_VERSION)
943     {
944       error ("AutoFDO profile version %u does match %u",
945 	     version, AUTO_PROFILE_VERSION);
946       return;
947     }
948 
949   /* Skip the empty integer.  */
950   gcov_read_unsigned ();
951 
952   /* string_table.  */
953   afdo_string_table = new string_table ();
954   if (!afdo_string_table->read())
955     {
956       error ("cannot read string table from %s", auto_profile_file);
957       return;
958     }
959 
960   /* autofdo_source_profile.  */
961   afdo_source_profile = autofdo_source_profile::create ();
962   if (afdo_source_profile == NULL)
963     {
964       error ("cannot read function profile from %s", auto_profile_file);
965       return;
966     }
967 
968   /* autofdo_module_profile.  */
969   fake_read_autofdo_module_profile ();
970 }
971 
972 /* From AutoFDO profiles, find values inside STMT for that we want to measure
973    histograms for indirect-call optimization.
974 
975    This function is actually served for 2 purposes:
976      * before annotation, we need to mark histogram, promote and inline
977      * after annotation, we just need to mark, and let follow-up logic to
978        decide if it needs to promote and inline.  */
979 
980 static void
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)981 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
982                     bool transform)
983 {
984   gimple *gs = gsi_stmt (*gsi);
985   tree callee;
986 
987   if (map.size () == 0)
988     return;
989   gcall *stmt = dyn_cast <gcall *> (gs);
990   if (!stmt
991       || gimple_call_internal_p (stmt)
992       || gimple_call_fndecl (stmt) != NULL_TREE)
993     return;
994 
995   gcov_type total = 0;
996   icall_target_map::const_iterator max_iter = map.end ();
997 
998   for (icall_target_map::const_iterator iter = map.begin ();
999        iter != map.end (); ++iter)
1000     {
1001       total += iter->second;
1002       if (max_iter == map.end () || max_iter->second < iter->second)
1003         max_iter = iter;
1004     }
1005   struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1006       get_identifier (afdo_string_table->get_name (max_iter->first)));
1007   if (direct_call == NULL || !direct_call->profile_id)
1008     return;
1009 
1010   callee = gimple_call_fn (stmt);
1011 
1012   histogram_value hist = gimple_alloc_histogram_value (
1013       cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1014   hist->n_counters = 3;
1015   hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1016   gimple_add_histogram_value (cfun, stmt, hist);
1017 
1018   hist->hvalue.counters[0] = direct_call->profile_id;
1019   hist->hvalue.counters[1] = max_iter->second;
1020   hist->hvalue.counters[2] = total;
1021 
1022   if (!transform)
1023     return;
1024 
1025   struct cgraph_edge *indirect_edge
1026       = cgraph_node::get (current_function_decl)->get_edge (stmt);
1027 
1028   if (dump_file)
1029     {
1030       fprintf (dump_file, "Indirect call -> direct call ");
1031       print_generic_expr (dump_file, callee, TDF_SLIM);
1032       fprintf (dump_file, " => ");
1033       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1034     }
1035 
1036   if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1037     {
1038       if (dump_file)
1039         fprintf (dump_file, " not transforming\n");
1040       return;
1041     }
1042   if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1043     {
1044       if (dump_file)
1045         fprintf (dump_file, " no declaration\n");
1046       return;
1047     }
1048 
1049   if (dump_file)
1050     {
1051       fprintf (dump_file, " transformation on insn ");
1052       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1053       fprintf (dump_file, "\n");
1054     }
1055 
1056   /* FIXME: Count should be initialized.  */
1057   struct cgraph_edge *new_edge
1058       = indirect_edge->make_speculative (direct_call,
1059 					 profile_count::uninitialized ());
1060   new_edge->redirect_call_stmt_to_callee ();
1061   gimple_remove_histogram_value (cfun, stmt, hist);
1062   inline_call (new_edge, true, NULL, NULL, false);
1063 }
1064 
1065 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1066    histograms and adds them to list VALUES.  */
1067 
1068 static void
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)1069 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1070           bool transform)
1071 {
1072   afdo_indirect_call (gsi, map, transform);
1073 }
1074 
1075 typedef std::set<basic_block> bb_set;
1076 typedef std::set<edge> edge_set;
1077 
1078 static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1079 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1080 {
1081   return annotated.find (bb) != annotated.end ();
1082 }
1083 
1084 static void
set_bb_annotated(basic_block bb,bb_set * annotated)1085 set_bb_annotated (basic_block bb, bb_set *annotated)
1086 {
1087   annotated->insert (bb);
1088 }
1089 
1090 /* For a given BB, set its execution count. Attach value profile if a stmt
1091    is not in PROMOTED, because we only want to promote an indirect call once.
1092    Return TRUE if BB is annotated.  */
1093 
1094 static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1095 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1096 {
1097   gimple_stmt_iterator gsi;
1098   edge e;
1099   edge_iterator ei;
1100   gcov_type max_count = 0;
1101   bool has_annotated = false;
1102 
1103   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1104     {
1105       count_info info;
1106       gimple *stmt = gsi_stmt (gsi);
1107       if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1108         continue;
1109       if (afdo_source_profile->get_count_info (stmt, &info))
1110         {
1111           if (info.count > max_count)
1112             max_count = info.count;
1113           has_annotated = true;
1114           if (info.targets.size () > 0
1115               && promoted.find (stmt) == promoted.end ())
1116             afdo_vpt (&gsi, info.targets, false);
1117         }
1118     }
1119 
1120   if (!has_annotated)
1121     return false;
1122 
1123   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1124     afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1125   for (gphi_iterator gpi = gsi_start_phis (bb);
1126        !gsi_end_p (gpi);
1127        gsi_next (&gpi))
1128     {
1129       gphi *phi = gpi.phi ();
1130       size_t i;
1131       for (i = 0; i < gimple_phi_num_args (phi); i++)
1132         afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1133     }
1134   FOR_EACH_EDGE (e, ei, bb->succs)
1135   afdo_source_profile->mark_annotated (e->goto_locus);
1136 
1137   bb->count = profile_count::from_gcov_type (max_count).afdo ();
1138   return true;
1139 }
1140 
1141 /* BB1 and BB2 are in an equivalent class iff:
1142    1. BB1 dominates BB2.
1143    2. BB2 post-dominates BB1.
1144    3. BB1 and BB2 are in the same loop nest.
1145    This function finds the equivalent class for each basic block, and
1146    stores a pointer to the first BB in its equivalent class. Meanwhile,
1147    set bb counts for the same equivalent class to be idenical. Update
1148    ANNOTATED_BB for the first BB in its equivalent class.  */
1149 
1150 static void
afdo_find_equiv_class(bb_set * annotated_bb)1151 afdo_find_equiv_class (bb_set *annotated_bb)
1152 {
1153   basic_block bb;
1154 
1155   FOR_ALL_BB_FN (bb, cfun)
1156   bb->aux = NULL;
1157 
1158   FOR_ALL_BB_FN (bb, cfun)
1159   {
1160     vec<basic_block> dom_bbs;
1161     basic_block bb1;
1162     int i;
1163 
1164     if (bb->aux != NULL)
1165       continue;
1166     bb->aux = bb;
1167     dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1168     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1169       if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1170 	  && bb1->loop_father == bb->loop_father)
1171 	{
1172 	  bb1->aux = bb;
1173 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1174 	    {
1175 	      bb->count = bb1->count;
1176 	      set_bb_annotated (bb, annotated_bb);
1177 	    }
1178 	}
1179     dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1180     FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1181       if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1182 	  && bb1->loop_father == bb->loop_father)
1183 	{
1184 	  bb1->aux = bb;
1185 	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1186 	    {
1187 	      bb->count = bb1->count;
1188 	      set_bb_annotated (bb, annotated_bb);
1189 	    }
1190 	}
1191   }
1192 }
1193 
1194 /* If a basic block's count is known, and only one of its in/out edges' count
1195    is unknown, its count can be calculated. Meanwhile, if all of the in/out
1196    edges' counts are known, then the basic block's unknown count can also be
1197    calculated.
1198    IS_SUCC is true if out edges of a basic blocks are examined.
1199    Update ANNOTATED_BB accordingly.
1200    Return TRUE if any basic block/edge count is changed.  */
1201 
1202 static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb)1203 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
1204 {
1205   basic_block bb;
1206   bool changed = false;
1207 
1208   FOR_EACH_BB_FN (bb, cfun)
1209   {
1210     edge e, unknown_edge = NULL;
1211     edge_iterator ei;
1212     int num_unknown_edge = 0;
1213     profile_count total_known_count = profile_count::zero ().afdo ();
1214 
1215     FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1216       {
1217 	gcc_assert (AFDO_EINFO (e) != NULL);
1218 	if (! AFDO_EINFO (e)->is_annotated ())
1219 	  num_unknown_edge++, unknown_edge = e;
1220 	else
1221 	  total_known_count += AFDO_EINFO (e)->get_count ();
1222       }
1223 
1224     /* Be careful not to annotate block with no successor in special cases.  */
1225     if (num_unknown_edge == 0 && total_known_count > bb->count)
1226       {
1227 	bb->count = total_known_count;
1228 	if (!is_bb_annotated (bb, *annotated_bb))
1229 	  set_bb_annotated (bb, annotated_bb);
1230 	changed = true;
1231       }
1232     else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1233       {
1234 	if (bb->count > total_known_count)
1235 	  AFDO_EINFO (unknown_edge)->set_count (bb->count - total_known_count);
1236 	else
1237 	  AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ());
1238 	AFDO_EINFO (unknown_edge)->set_annotated ();
1239 	changed = true;
1240       }
1241   }
1242   return changed;
1243 }
1244 
1245 /* Special propagation for circuit expressions. Because GCC translates
1246    control flow into data flow for circuit expressions. E.g.
1247    BB1:
1248    if (a && b)
1249      BB2
1250    else
1251      BB3
1252 
1253    will be translated into:
1254 
1255    BB1:
1256      if (a)
1257        goto BB.t1
1258      else
1259        goto BB.t3
1260    BB.t1:
1261      if (b)
1262        goto BB.t2
1263      else
1264        goto BB.t3
1265    BB.t2:
1266      goto BB.t3
1267    BB.t3:
1268      tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1269      if (tmp)
1270        goto BB2
1271      else
1272        goto BB3
1273 
1274    In this case, we need to propagate through PHI to determine the edge
1275    count of BB1->BB.t1, BB.t1->BB.t2.  */
1276 
1277 static void
afdo_propagate_circuit(const bb_set & annotated_bb)1278 afdo_propagate_circuit (const bb_set &annotated_bb)
1279 {
1280   basic_block bb;
1281   FOR_ALL_BB_FN (bb, cfun)
1282   {
1283     gimple *def_stmt;
1284     tree cmp_rhs, cmp_lhs;
1285     gimple *cmp_stmt = last_stmt (bb);
1286     edge e;
1287     edge_iterator ei;
1288 
1289     if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1290       continue;
1291     cmp_rhs = gimple_cond_rhs (cmp_stmt);
1292     cmp_lhs = gimple_cond_lhs (cmp_stmt);
1293     if (!TREE_CONSTANT (cmp_rhs)
1294         || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1295       continue;
1296     if (TREE_CODE (cmp_lhs) != SSA_NAME)
1297       continue;
1298     if (!is_bb_annotated (bb, annotated_bb))
1299       continue;
1300     def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1301     while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1302            && gimple_assign_single_p (def_stmt)
1303            && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1304       def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1305     if (!def_stmt)
1306       continue;
1307     gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1308     if (!phi_stmt)
1309       continue;
1310     FOR_EACH_EDGE (e, ei, bb->succs)
1311     {
1312       unsigned i, total = 0;
1313       edge only_one;
1314       bool check_value_one = (((integer_onep (cmp_rhs))
1315                                ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1316                               ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1317       if (! AFDO_EINFO (e)->is_annotated ())
1318         continue;
1319       for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1320         {
1321           tree val = gimple_phi_arg_def (phi_stmt, i);
1322           edge ep = gimple_phi_arg_edge (phi_stmt, i);
1323 
1324           if (!TREE_CONSTANT (val)
1325               || !(integer_zerop (val) || integer_onep (val)))
1326             continue;
1327           if (check_value_one ^ integer_onep (val))
1328             continue;
1329           total++;
1330           only_one = ep;
1331           if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
1332 	      && ! AFDO_EINFO (ep)->is_annotated ())
1333 	    {
1334 	      AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
1335 	      AFDO_EINFO (ep)->set_annotated ();
1336 	    }
1337 	}
1338       if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
1339 	{
1340 	  AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
1341 	  AFDO_EINFO (only_one)->set_annotated ();
1342 	}
1343     }
1344   }
1345 }
1346 
1347 /* Propagate the basic block count and edge count on the control flow
1348    graph. We do the propagation iteratively until stablize.  */
1349 
1350 static void
afdo_propagate(bb_set * annotated_bb)1351 afdo_propagate (bb_set *annotated_bb)
1352 {
1353   basic_block bb;
1354   bool changed = true;
1355   int i = 0;
1356 
1357   FOR_ALL_BB_FN (bb, cfun)
1358   {
1359     bb->count = ((basic_block)bb->aux)->count;
1360     if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
1361       set_bb_annotated (bb, annotated_bb);
1362   }
1363 
1364   while (changed && i++ < 10)
1365     {
1366       changed = false;
1367 
1368       if (afdo_propagate_edge (true, annotated_bb))
1369         changed = true;
1370       if (afdo_propagate_edge (false, annotated_bb))
1371         changed = true;
1372       afdo_propagate_circuit (*annotated_bb);
1373     }
1374 }
1375 
1376 /* Propagate counts on control flow graph and calculate branch
1377    probabilities.  */
1378 
1379 static void
afdo_calculate_branch_prob(bb_set * annotated_bb)1380 afdo_calculate_branch_prob (bb_set *annotated_bb)
1381 {
1382   edge e;
1383   edge_iterator ei;
1384   basic_block bb;
1385 
1386   calculate_dominance_info (CDI_POST_DOMINATORS);
1387   calculate_dominance_info (CDI_DOMINATORS);
1388   loop_optimizer_init (0);
1389 
1390   FOR_ALL_BB_FN (bb, cfun)
1391     {
1392       gcc_assert (bb->aux == NULL);
1393       FOR_EACH_EDGE (e, ei, bb->succs)
1394 	{
1395 	  gcc_assert (e->aux == NULL);
1396 	  e->aux = new edge_info ();
1397 	}
1398     }
1399 
1400   afdo_find_equiv_class (annotated_bb);
1401   afdo_propagate (annotated_bb);
1402 
1403   FOR_EACH_BB_FN (bb, cfun)
1404   {
1405     int num_unknown_succ = 0;
1406     profile_count total_count = profile_count::zero ().afdo ();
1407 
1408     FOR_EACH_EDGE (e, ei, bb->succs)
1409     {
1410       gcc_assert (AFDO_EINFO (e) != NULL);
1411       if (! AFDO_EINFO (e)->is_annotated ())
1412         num_unknown_succ++;
1413       else
1414         total_count += AFDO_EINFO (e)->get_count ();
1415     }
1416     if (num_unknown_succ == 0 && total_count > profile_count::zero ())
1417       {
1418 	FOR_EACH_EDGE (e, ei, bb->succs)
1419 	  e->probability
1420 	    = AFDO_EINFO (e)->get_count ().probability_in (total_count);
1421       }
1422   }
1423   FOR_ALL_BB_FN (bb, cfun)
1424     {
1425       bb->aux = NULL;
1426       FOR_EACH_EDGE (e, ei, bb->succs)
1427 	if (AFDO_EINFO (e) != NULL)
1428 	  {
1429 	    delete AFDO_EINFO (e);
1430 	    e->aux = NULL;
1431 	  }
1432     }
1433 
1434   loop_optimizer_finalize ();
1435   free_dominance_info (CDI_DOMINATORS);
1436   free_dominance_info (CDI_POST_DOMINATORS);
1437 }
1438 
1439 /* Perform value profile transformation using AutoFDO profile. Add the
1440    promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1441    indirect call promoted.  */
1442 
1443 static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1444 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1445 {
1446   basic_block bb;
1447   if (afdo_source_profile->get_function_instance_by_decl (
1448           current_function_decl) == NULL)
1449     return false;
1450 
1451   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1452 
1453   bool has_vpt = false;
1454   FOR_EACH_BB_FN (bb, cfun)
1455   {
1456     if (!has_indirect_call (bb))
1457       continue;
1458     gimple_stmt_iterator gsi;
1459 
1460     gcov_type bb_count = 0;
1461     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1462       {
1463         count_info info;
1464 	gimple *stmt = gsi_stmt (gsi);
1465         if (afdo_source_profile->get_count_info (stmt, &info))
1466           bb_count = MAX (bb_count, info.count);
1467       }
1468 
1469     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1470       {
1471         gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1472         /* IC_promotion and early_inline_2 is done in multiple iterations.
1473            No need to promoted the stmt if its in promoted_stmts (means
1474            it is already been promoted in the previous iterations).  */
1475         if ((!stmt) || gimple_call_fn (stmt) == NULL
1476             || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1477             || promoted_stmts->find (stmt) != promoted_stmts->end ())
1478           continue;
1479 
1480         count_info info;
1481         afdo_source_profile->get_count_info (stmt, &info);
1482         info.count = bb_count;
1483         if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1484           {
1485             /* Promote the indirect call and update the promoted_stmts.  */
1486             promoted_stmts->insert (stmt);
1487             afdo_vpt (&gsi, info.targets, true);
1488             has_vpt = true;
1489           }
1490       }
1491   }
1492 
1493   if (has_vpt)
1494     {
1495       unsigned todo = optimize_inline_calls (current_function_decl);
1496       if (todo & TODO_update_ssa_any)
1497        update_ssa (TODO_update_ssa);
1498       return true;
1499     }
1500 
1501   return false;
1502 }
1503 
1504 /* Annotate auto profile to the control flow graph. Do not annotate value
1505    profile for stmts in PROMOTED_STMTS.  */
1506 
1507 static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1508 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1509 {
1510   basic_block bb;
1511   bb_set annotated_bb;
1512   const function_instance *s
1513       = afdo_source_profile->get_function_instance_by_decl (
1514           current_function_decl);
1515 
1516   if (s == NULL)
1517     return;
1518   cgraph_node::get (current_function_decl)->count
1519      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1520   ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1521      = profile_count::from_gcov_type (s->head_count ()).afdo ();
1522   EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo ();
1523   profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1524 
1525   FOR_EACH_BB_FN (bb, cfun)
1526     {
1527       /* As autoFDO uses sampling approach, we have to assume that all
1528 	 counters are zero when not seen by autoFDO.  */
1529       bb->count = profile_count::zero ().afdo ();
1530       if (afdo_set_bb_count (bb, promoted_stmts))
1531 	set_bb_annotated (bb, &annotated_bb);
1532       if (bb->count > max_count)
1533 	max_count = bb->count;
1534     }
1535   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1536       > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1537     {
1538       ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1539           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1540       set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1541     }
1542   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1543       > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1544     {
1545       EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1546           = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1547       set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1548     }
1549   afdo_source_profile->mark_annotated (
1550       DECL_SOURCE_LOCATION (current_function_decl));
1551   afdo_source_profile->mark_annotated (cfun->function_start_locus);
1552   afdo_source_profile->mark_annotated (cfun->function_end_locus);
1553   if (max_count > profile_count::zero ())
1554     {
1555       /* Calculate, propagate count and probability information on CFG.  */
1556       afdo_calculate_branch_prob (&annotated_bb);
1557     }
1558   update_max_bb_count ();
1559   profile_status_for_fn (cfun) = PROFILE_READ;
1560   if (flag_value_profile_transformations)
1561     {
1562       gimple_value_profile_transformations ();
1563       free_dominance_info (CDI_DOMINATORS);
1564       free_dominance_info (CDI_POST_DOMINATORS);
1565       update_ssa (TODO_update_ssa);
1566     }
1567 }
1568 
1569 /* Wrapper function to invoke early inliner.  */
1570 
1571 static void
early_inline()1572 early_inline ()
1573 {
1574   compute_fn_summary (cgraph_node::get (current_function_decl), true);
1575   unsigned todo = early_inliner (cfun);
1576   if (todo & TODO_update_ssa_any)
1577     update_ssa (TODO_update_ssa);
1578 }
1579 
1580 /* Use AutoFDO profile to annoate the control flow graph.
1581    Return the todo flag.  */
1582 
1583 static unsigned int
auto_profile(void)1584 auto_profile (void)
1585 {
1586   struct cgraph_node *node;
1587 
1588   if (symtab->state == FINISHED)
1589     return 0;
1590 
1591   init_node_map (true);
1592   profile_info = autofdo::afdo_profile_info;
1593 
1594   FOR_EACH_FUNCTION (node)
1595   {
1596     if (!gimple_has_body_p (node->decl))
1597       continue;
1598 
1599     /* Don't profile functions produced for builtin stuff.  */
1600     if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1601       continue;
1602 
1603     push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1604 
1605     /* First do indirect call promotion and early inline to make the
1606        IR match the profiled binary before actual annotation.
1607 
1608        This is needed because an indirect call might have been promoted
1609        and inlined in the profiled binary. If we do not promote and
1610        inline these indirect calls before annotation, the profile for
1611        these promoted functions will be lost.
1612 
1613        e.g. foo() --indirect_call--> bar()
1614        In profiled binary, the callsite is promoted and inlined, making
1615        the profile look like:
1616 
1617        foo: {
1618          loc_foo_1: count_1
1619          bar@loc_foo_2: {
1620            loc_bar_1: count_2
1621            loc_bar_2: count_3
1622          }
1623        }
1624 
1625        Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1626        If we perform annotation on it, the profile inside bar@loc_foo2
1627        will be wasted.
1628 
1629        To avoid this, we promote loc_foo_2 and inline the promoted bar
1630        function before annotation, so the profile inside bar@loc_foo2
1631        will be useful.  */
1632     autofdo::stmt_set promoted_stmts;
1633     for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1634       {
1635         if (!flag_value_profile_transformations
1636             || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1637           break;
1638         early_inline ();
1639       }
1640 
1641     early_inline ();
1642     autofdo::afdo_annotate_cfg (promoted_stmts);
1643     compute_function_frequency ();
1644 
1645     /* Local pure-const may imply need to fixup the cfg.  */
1646     if (execute_fixup_cfg () & TODO_cleanup_cfg)
1647       cleanup_tree_cfg ();
1648 
1649     free_dominance_info (CDI_DOMINATORS);
1650     free_dominance_info (CDI_POST_DOMINATORS);
1651     cgraph_edge::rebuild_edges ();
1652     compute_fn_summary (cgraph_node::get (current_function_decl), true);
1653     pop_cfun ();
1654   }
1655 
1656   return TODO_rebuild_cgraph_edges;
1657 }
1658 } /* namespace autofdo.  */
1659 
1660 /* Read the profile from the profile data file.  */
1661 
1662 void
read_autofdo_file(void)1663 read_autofdo_file (void)
1664 {
1665   if (auto_profile_file == NULL)
1666     auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1667 
1668   autofdo::afdo_profile_info = XNEW (gcov_summary);
1669   autofdo::afdo_profile_info->runs = 1;
1670   autofdo::afdo_profile_info->sum_max = 0;
1671 
1672   /* Read the profile from the profile file.  */
1673   autofdo::read_profile ();
1674 }
1675 
1676 /* Free the resources.  */
1677 
1678 void
end_auto_profile(void)1679 end_auto_profile (void)
1680 {
1681   delete autofdo::afdo_source_profile;
1682   delete autofdo::afdo_string_table;
1683   profile_info = NULL;
1684 }
1685 
1686 /* Returns TRUE if EDGE is hot enough to be inlined early.  */
1687 
1688 bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1689 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1690 {
1691   gcov_type count
1692       = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1693 
1694   if (count > 0)
1695     {
1696       bool is_hot;
1697       profile_count pcount = profile_count::from_gcov_type (count).afdo ();
1698       gcov_summary *saved_profile_info = profile_info;
1699       /* At early inline stage, profile_info is not set yet. We need to
1700          temporarily set it to afdo_profile_info to calculate hotness.  */
1701       profile_info = autofdo::afdo_profile_info;
1702       is_hot = maybe_hot_count_p (NULL, pcount);
1703       profile_info = saved_profile_info;
1704       return is_hot;
1705     }
1706 
1707   return false;
1708 }
1709 
1710 namespace
1711 {
1712 
1713 const pass_data pass_data_ipa_auto_profile = {
1714   SIMPLE_IPA_PASS, "afdo", /* name */
1715   OPTGROUP_NONE,           /* optinfo_flags */
1716   TV_IPA_AUTOFDO,          /* tv_id */
1717   0,                       /* properties_required */
1718   0,                       /* properties_provided */
1719   0,                       /* properties_destroyed */
1720   0,                       /* todo_flags_start */
1721   0,                       /* todo_flags_finish */
1722 };
1723 
1724 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1725 {
1726 public:
pass_ipa_auto_profile(gcc::context * ctxt)1727   pass_ipa_auto_profile (gcc::context *ctxt)
1728       : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1729   {
1730   }
1731 
1732   /* opt_pass methods: */
1733   virtual bool
gate(function *)1734   gate (function *)
1735   {
1736     return flag_auto_profile;
1737   }
1738   virtual unsigned int
execute(function *)1739   execute (function *)
1740   {
1741     return autofdo::auto_profile ();
1742   }
1743 }; // class pass_ipa_auto_profile
1744 
1745 } // anon namespace
1746 
1747 simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1748 make_pass_ipa_auto_profile (gcc::context *ctxt)
1749 {
1750   return new pass_ipa_auto_profile (ctxt);
1751 }
1752