1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2016 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #define INCLUDE_MAP
23 #define INCLUDE_SET
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gcov-io.h"
35 #include "diagnostic-core.h"
36 #include "profile.h"
37 #include "langhooks.h"
38 #include "cfgloop.h"
39 #include "tree-cfg.h"
40 #include "tree-cfgcleanup.h"
41 #include "tree-into-ssa.h"
42 #include "gimple-iterator.h"
43 #include "value-prof.h"
44 #include "params.h"
45 #include "symbol-summary.h"
46 #include "ipa-prop.h"
47 #include "ipa-inline.h"
48 #include "tree-inline.h"
49 #include "auto-profile.h"
50
51 /* The following routines implements AutoFDO optimization.
52
53 This optimization uses sampling profiles to annotate basic block counts
54 and uses heuristics to estimate branch probabilities.
55
56 There are three phases in AutoFDO:
57
58 Phase 1: Read profile from the profile data file.
59 The following info is read from the profile datafile:
60 * string_table: a map between function name and its index.
61 * autofdo_source_profile: a map from function_instance name to
62 function_instance. This is represented as a forest of
63 function_instances.
64 * WorkingSet: a histogram of how many instructions are covered for a
65 given percentage of total cycles. This is describing the binary
66 level information (not source level). This info is used to help
67 decide if we want aggressive optimizations that could increase
68 code footprint (e.g. loop unroll etc.)
69 A function instance is an instance of function that could either be a
70 standalone symbol, or a clone of a function that is inlined into another
71 function.
72
73 Phase 2: Early inline + value profile transformation.
74 Early inline uses autofdo_source_profile to find if a callsite is:
75 * inlined in the profiled binary.
76 * callee body is hot in the profiling run.
77 If both condition satisfies, early inline will inline the callsite
78 regardless of the code growth.
79 Phase 2 is an iterative process. During each iteration, we also check
80 if an indirect callsite is promoted and inlined in the profiling run.
81 If yes, vpt will happen to force promote it and in the next iteration,
82 einline will inline the promoted callsite in the next iteration.
83
84 Phase 3: Annotate control flow graph.
85 AutoFDO uses a separate pass to:
86 * Annotate basic block count
87 * Estimate branch probability
88
89 After the above 3 phases, all profile is readily annotated on the GCC IR.
90 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
91 use of the profile. E.g. it uses existing mechanism to calculate the basic
92 block/edge frequency, as well as the cgraph node/edge count.
93 */
94
95 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
96 #define AUTO_PROFILE_VERSION 1
97
98 namespace autofdo
99 {
100
101 /* Represent a source location: (function_decl, lineno). */
102 typedef std::pair<tree, unsigned> decl_lineno;
103
104 /* Represent an inline stack. vector[0] is the leaf node. */
105 typedef auto_vec<decl_lineno> inline_stack;
106
107 /* String array that stores function names. */
108 typedef auto_vec<char *> string_vector;
109
110 /* Map from function name's index in string_table to target's
111 execution count. */
112 typedef std::map<unsigned, gcov_type> icall_target_map;
113
114 /* Set of gimple stmts. Used to track if the stmt has already been promoted
115 to direct call. */
116 typedef std::set<gimple *> stmt_set;
117
118 /* Represent count info of an inline stack. */
119 struct count_info
120 {
121 /* Sampled count of the inline stack. */
122 gcov_type count;
123
124 /* Map from indirect call target to its sample count. */
125 icall_target_map targets;
126
127 /* Whether this inline stack is already used in annotation.
128
129 Each inline stack should only be used to annotate IR once.
130 This will be enforced when instruction-level discriminator
131 is supported. */
132 bool annotated;
133 };
134
135 /* operator< for "const char *". */
136 struct string_compare
137 {
operatorstring_compare138 bool operator()(const char *a, const char *b) const
139 {
140 return strcmp (a, b) < 0;
141 }
142 };
143
144 /* Store a string array, indexed by string position in the array. */
145 class string_table
146 {
147 public:
string_table()148 string_table ()
149 {}
150
151 ~string_table ();
152
153 /* For a given string, returns its index. */
154 int get_index (const char *name) const;
155
156 /* For a given decl, returns the index of the decl name. */
157 int get_index_by_decl (tree decl) const;
158
159 /* For a given index, returns the string. */
160 const char *get_name (int index) const;
161
162 /* Read profile, return TRUE on success. */
163 bool read ();
164
165 private:
166 typedef std::map<const char *, unsigned, string_compare> string_index_map;
167 string_vector vector_;
168 string_index_map map_;
169 };
170
171 /* Profile of a function instance:
172 1. total_count of the function.
173 2. head_count (entry basic block count) of the function (only valid when
174 function is a top-level function_instance, i.e. it is the original copy
175 instead of the inlined copy).
176 3. map from source location (decl_lineno) to profile (count_info).
177 4. map from callsite to callee function_instance. */
178 class function_instance
179 {
180 public:
181 typedef auto_vec<function_instance *> function_instance_stack;
182
183 /* Read the profile and return a function_instance with head count as
184 HEAD_COUNT. Recursively read callsites to create nested function_instances
185 too. STACK is used to track the recursive creation process. */
186 static function_instance *
187 read_function_instance (function_instance_stack *stack,
188 gcov_type head_count);
189
190 /* Recursively deallocate all callsites (nested function_instances). */
191 ~function_instance ();
192
193 /* Accessors. */
194 int
name()195 name () const
196 {
197 return name_;
198 }
199 gcov_type
total_count()200 total_count () const
201 {
202 return total_count_;
203 }
204 gcov_type
head_count()205 head_count () const
206 {
207 return head_count_;
208 }
209
210 /* Traverse callsites of the current function_instance to find one at the
211 location of LINENO and callee name represented in DECL. */
212 function_instance *get_function_instance_by_decl (unsigned lineno,
213 tree decl) const;
214
215 /* Store the profile info for LOC in INFO. Return TRUE if profile info
216 is found. */
217 bool get_count_info (location_t loc, count_info *info) const;
218
219 /* Read the inlined indirect call target profile for STMT and store it in
220 MAP, return the total count for all inlined indirect calls. */
221 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
222
223 /* Sum of counts that is used during annotation. */
224 gcov_type total_annotated_count () const;
225
226 /* Mark LOC as annotated. */
227 void mark_annotated (location_t loc);
228
229 private:
230 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
231 typedef std::pair<unsigned, unsigned> callsite;
232
233 /* Map from callsite to callee function_instance. */
234 typedef std::map<callsite, function_instance *> callsite_map;
235
function_instance(unsigned name,gcov_type head_count)236 function_instance (unsigned name, gcov_type head_count)
237 : name_ (name), total_count_ (0), head_count_ (head_count)
238 {
239 }
240
241 /* Map from source location (decl_lineno) to profile (count_info). */
242 typedef std::map<unsigned, count_info> position_count_map;
243
244 /* function_instance name index in the string_table. */
245 unsigned name_;
246
247 /* Total sample count. */
248 gcov_type total_count_;
249
250 /* Entry BB's sample count. */
251 gcov_type head_count_;
252
253 /* Map from callsite location to callee function_instance. */
254 callsite_map callsites;
255
256 /* Map from source location to count_info. */
257 position_count_map pos_counts;
258 };
259
260 /* Profile for all functions. */
261 class autofdo_source_profile
262 {
263 public:
264 static autofdo_source_profile *
create()265 create ()
266 {
267 autofdo_source_profile *map = new autofdo_source_profile ();
268
269 if (map->read ())
270 return map;
271 delete map;
272 return NULL;
273 }
274
275 ~autofdo_source_profile ();
276
277 /* For a given DECL, returns the top-level function_instance. */
278 function_instance *get_function_instance_by_decl (tree decl) const;
279
280 /* Find count_info for a given gimple STMT. If found, store the count_info
281 in INFO and return true; otherwise return false. */
282 bool get_count_info (gimple *stmt, count_info *info) const;
283
284 /* Find total count of the callee of EDGE. */
285 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
286
287 /* Update value profile INFO for STMT from the inlined indirect callsite.
288 Return true if INFO is updated. */
289 bool update_inlined_ind_target (gcall *stmt, count_info *info);
290
291 /* Mark LOC as annotated. */
292 void mark_annotated (location_t loc);
293
294 private:
295 /* Map from function_instance name index (in string_table) to
296 function_instance. */
297 typedef std::map<unsigned, function_instance *> name_function_instance_map;
298
autofdo_source_profile()299 autofdo_source_profile () {}
300
301 /* Read AutoFDO profile and returns TRUE on success. */
302 bool read ();
303
304 /* Return the function_instance in the profile that correspond to the
305 inline STACK. */
306 function_instance *
307 get_function_instance_by_inline_stack (const inline_stack &stack) const;
308
309 name_function_instance_map map_;
310 };
311
312 /* Store the strings read from the profile data file. */
313 static string_table *afdo_string_table;
314
315 /* Store the AutoFDO source profile. */
316 static autofdo_source_profile *afdo_source_profile;
317
318 /* gcov_ctr_summary structure to store the profile_info. */
319 static struct gcov_ctr_summary *afdo_profile_info;
320
321 /* Helper functions. */
322
323 /* Return the original name of NAME: strip the suffix that starts
324 with '.' Caller is responsible for freeing RET. */
325
326 static char *
get_original_name(const char * name)327 get_original_name (const char *name)
328 {
329 char *ret = xstrdup (name);
330 char *find = strchr (ret, '.');
331 if (find != NULL)
332 *find = 0;
333 return ret;
334 }
335
336 /* Return the combined location, which is a 32bit integer in which
337 higher 16 bits stores the line offset of LOC to the start lineno
338 of DECL, The lower 16 bits stores the discriminator. */
339
340 static unsigned
get_combined_location(location_t loc,tree decl)341 get_combined_location (location_t loc, tree decl)
342 {
343 /* TODO: allow more bits for line and less bits for discriminator. */
344 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
345 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
346 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
347 }
348
349 /* Return the function decl of a given lexical BLOCK. */
350
351 static tree
get_function_decl_from_block(tree block)352 get_function_decl_from_block (tree block)
353 {
354 tree decl;
355
356 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) == UNKNOWN_LOCATION)
357 return NULL_TREE;
358
359 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
360 decl && (TREE_CODE (decl) == BLOCK);
361 decl = BLOCK_ABSTRACT_ORIGIN (decl))
362 if (TREE_CODE (decl) == FUNCTION_DECL)
363 break;
364 return decl;
365 }
366
367 /* Store inline stack for STMT in STACK. */
368
369 static void
get_inline_stack(location_t locus,inline_stack * stack)370 get_inline_stack (location_t locus, inline_stack *stack)
371 {
372 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
373 return;
374
375 tree block = LOCATION_BLOCK (locus);
376 if (block && TREE_CODE (block) == BLOCK)
377 {
378 int level = 0;
379 for (block = BLOCK_SUPERCONTEXT (block);
380 block && (TREE_CODE (block) == BLOCK);
381 block = BLOCK_SUPERCONTEXT (block))
382 {
383 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
384 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
385 continue;
386
387 tree decl = get_function_decl_from_block (block);
388 stack->safe_push (
389 std::make_pair (decl, get_combined_location (locus, decl)));
390 locus = tmp_locus;
391 level++;
392 }
393 }
394 stack->safe_push (
395 std::make_pair (current_function_decl,
396 get_combined_location (locus, current_function_decl)));
397 }
398
399 /* Return STMT's combined location, which is a 32bit integer in which
400 higher 16 bits stores the line offset of LOC to the start lineno
401 of DECL, The lower 16 bits stores the discriminator. */
402
403 static unsigned
get_relative_location_for_stmt(gimple * stmt)404 get_relative_location_for_stmt (gimple *stmt)
405 {
406 location_t locus = gimple_location (stmt);
407 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
408 return UNKNOWN_LOCATION;
409
410 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
411 block = BLOCK_SUPERCONTEXT (block))
412 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
413 return get_combined_location (locus,
414 get_function_decl_from_block (block));
415 return get_combined_location (locus, current_function_decl);
416 }
417
418 /* Return true if BB contains indirect call. */
419
420 static bool
has_indirect_call(basic_block bb)421 has_indirect_call (basic_block bb)
422 {
423 gimple_stmt_iterator gsi;
424
425 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
426 {
427 gimple *stmt = gsi_stmt (gsi);
428 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
429 && (gimple_call_fn (stmt) == NULL
430 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
431 return true;
432 }
433 return false;
434 }
435
436 /* Member functions for string_table. */
437
438 /* Deconstructor. */
439
~string_table()440 string_table::~string_table ()
441 {
442 for (unsigned i = 0; i < vector_.length (); i++)
443 free (vector_[i]);
444 }
445
446
447 /* Return the index of a given function NAME. Return -1 if NAME is not
448 found in string table. */
449
450 int
get_index(const char * name)451 string_table::get_index (const char *name) const
452 {
453 if (name == NULL)
454 return -1;
455 string_index_map::const_iterator iter = map_.find (name);
456 if (iter == map_.end ())
457 return -1;
458
459 return iter->second;
460 }
461
462 /* Return the index of a given function DECL. Return -1 if DECL is not
463 found in string table. */
464
465 int
get_index_by_decl(tree decl)466 string_table::get_index_by_decl (tree decl) const
467 {
468 char *name
469 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
470 int ret = get_index (name);
471 free (name);
472 if (ret != -1)
473 return ret;
474 ret = get_index (lang_hooks.dwarf_name (decl, 0));
475 if (ret != -1)
476 return ret;
477 if (DECL_ABSTRACT_ORIGIN (decl))
478 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
479
480 return -1;
481 }
482
483 /* Return the function name of a given INDEX. */
484
485 const char *
get_name(int index)486 string_table::get_name (int index) const
487 {
488 gcc_assert (index > 0 && index < (int)vector_.length ());
489 return vector_[index];
490 }
491
492 /* Read the string table. Return TRUE if reading is successful. */
493
494 bool
read()495 string_table::read ()
496 {
497 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
498 return false;
499 /* Skip the length of the section. */
500 gcov_read_unsigned ();
501 /* Read in the file name table. */
502 unsigned string_num = gcov_read_unsigned ();
503 for (unsigned i = 0; i < string_num; i++)
504 {
505 vector_.safe_push (get_original_name (gcov_read_string ()));
506 map_[vector_.last ()] = i;
507 }
508 return true;
509 }
510
511 /* Member functions for function_instance. */
512
~function_instance()513 function_instance::~function_instance ()
514 {
515 for (callsite_map::iterator iter = callsites.begin ();
516 iter != callsites.end (); ++iter)
517 delete iter->second;
518 }
519
520 /* Traverse callsites of the current function_instance to find one at the
521 location of LINENO and callee name represented in DECL. */
522
523 function_instance *
get_function_instance_by_decl(unsigned lineno,tree decl)524 function_instance::get_function_instance_by_decl (unsigned lineno,
525 tree decl) const
526 {
527 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
528 if (func_name_idx != -1)
529 {
530 callsite_map::const_iterator ret
531 = callsites.find (std::make_pair (lineno, func_name_idx));
532 if (ret != callsites.end ())
533 return ret->second;
534 }
535 func_name_idx
536 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
537 if (func_name_idx != -1)
538 {
539 callsite_map::const_iterator ret
540 = callsites.find (std::make_pair (lineno, func_name_idx));
541 if (ret != callsites.end ())
542 return ret->second;
543 }
544 if (DECL_ABSTRACT_ORIGIN (decl))
545 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
546
547 return NULL;
548 }
549
550 /* Store the profile info for LOC in INFO. Return TRUE if profile info
551 is found. */
552
553 bool
get_count_info(location_t loc,count_info * info)554 function_instance::get_count_info (location_t loc, count_info *info) const
555 {
556 position_count_map::const_iterator iter = pos_counts.find (loc);
557 if (iter == pos_counts.end ())
558 return false;
559 *info = iter->second;
560 return true;
561 }
562
563 /* Mark LOC as annotated. */
564
565 void
mark_annotated(location_t loc)566 function_instance::mark_annotated (location_t loc)
567 {
568 position_count_map::iterator iter = pos_counts.find (loc);
569 if (iter == pos_counts.end ())
570 return;
571 iter->second.annotated = true;
572 }
573
574 /* Read the inlined indirect call target profile for STMT and store it in
575 MAP, return the total count for all inlined indirect calls. */
576
577 gcov_type
find_icall_target_map(gcall * stmt,icall_target_map * map)578 function_instance::find_icall_target_map (gcall *stmt,
579 icall_target_map *map) const
580 {
581 gcov_type ret = 0;
582 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
583
584 for (callsite_map::const_iterator iter = callsites.begin ();
585 iter != callsites.end (); ++iter)
586 {
587 unsigned callee = iter->second->name ();
588 /* Check if callsite location match the stmt. */
589 if (iter->first.first != stmt_offset)
590 continue;
591 struct cgraph_node *node = cgraph_node::get_for_asmname (
592 get_identifier (afdo_string_table->get_name (callee)));
593 if (node == NULL)
594 continue;
595 if (!check_ic_target (stmt, node))
596 continue;
597 (*map)[callee] = iter->second->total_count ();
598 ret += iter->second->total_count ();
599 }
600 return ret;
601 }
602
603 /* Read the profile and create a function_instance with head count as
604 HEAD_COUNT. Recursively read callsites to create nested function_instances
605 too. STACK is used to track the recursive creation process. */
606
607 /* function instance profile format:
608
609 ENTRY_COUNT: 8 bytes
610 NAME_INDEX: 4 bytes
611 NUM_POS_COUNTS: 4 bytes
612 NUM_CALLSITES: 4 byte
613 POS_COUNT_1:
614 POS_1_OFFSET: 4 bytes
615 NUM_TARGETS: 4 bytes
616 COUNT: 8 bytes
617 TARGET_1:
618 VALUE_PROFILE_TYPE: 4 bytes
619 TARGET_IDX: 8 bytes
620 COUNT: 8 bytes
621 TARGET_2
622 ...
623 TARGET_n
624 POS_COUNT_2
625 ...
626 POS_COUNT_N
627 CALLSITE_1:
628 CALLSITE_1_OFFSET: 4 bytes
629 FUNCTION_INSTANCE_PROFILE (nested)
630 CALLSITE_2
631 ...
632 CALLSITE_n. */
633
634 function_instance *
read_function_instance(function_instance_stack * stack,gcov_type head_count)635 function_instance::read_function_instance (function_instance_stack *stack,
636 gcov_type head_count)
637 {
638 unsigned name = gcov_read_unsigned ();
639 unsigned num_pos_counts = gcov_read_unsigned ();
640 unsigned num_callsites = gcov_read_unsigned ();
641 function_instance *s = new function_instance (name, head_count);
642 stack->safe_push (s);
643
644 for (unsigned i = 0; i < num_pos_counts; i++)
645 {
646 unsigned offset = gcov_read_unsigned () & 0xffff0000;
647 unsigned num_targets = gcov_read_unsigned ();
648 gcov_type count = gcov_read_counter ();
649 s->pos_counts[offset].count = count;
650 for (unsigned j = 0; j < stack->length (); j++)
651 (*stack)[j]->total_count_ += count;
652 for (unsigned j = 0; j < num_targets; j++)
653 {
654 /* Only indirect call target histogram is supported now. */
655 gcov_read_unsigned ();
656 gcov_type target_idx = gcov_read_counter ();
657 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
658 }
659 }
660 for (unsigned i = 0; i < num_callsites; i++)
661 {
662 unsigned offset = gcov_read_unsigned ();
663 function_instance *callee_function_instance
664 = read_function_instance (stack, 0);
665 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
666 = callee_function_instance;
667 }
668 stack->pop ();
669 return s;
670 }
671
672 /* Sum of counts that is used during annotation. */
673
674 gcov_type
total_annotated_count()675 function_instance::total_annotated_count () const
676 {
677 gcov_type ret = 0;
678 for (callsite_map::const_iterator iter = callsites.begin ();
679 iter != callsites.end (); ++iter)
680 ret += iter->second->total_annotated_count ();
681 for (position_count_map::const_iterator iter = pos_counts.begin ();
682 iter != pos_counts.end (); ++iter)
683 if (iter->second.annotated)
684 ret += iter->second.count;
685 return ret;
686 }
687
688 /* Member functions for autofdo_source_profile. */
689
~autofdo_source_profile()690 autofdo_source_profile::~autofdo_source_profile ()
691 {
692 for (name_function_instance_map::const_iterator iter = map_.begin ();
693 iter != map_.end (); ++iter)
694 delete iter->second;
695 }
696
697 /* For a given DECL, returns the top-level function_instance. */
698
699 function_instance *
get_function_instance_by_decl(tree decl)700 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
701 {
702 int index = afdo_string_table->get_index_by_decl (decl);
703 if (index == -1)
704 return NULL;
705 name_function_instance_map::const_iterator ret = map_.find (index);
706 return ret == map_.end () ? NULL : ret->second;
707 }
708
709 /* Find count_info for a given gimple STMT. If found, store the count_info
710 in INFO and return true; otherwise return false. */
711
712 bool
get_count_info(gimple * stmt,count_info * info)713 autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
714 {
715 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
716 return false;
717
718 inline_stack stack;
719 get_inline_stack (gimple_location (stmt), &stack);
720 if (stack.length () == 0)
721 return false;
722 function_instance *s = get_function_instance_by_inline_stack (stack);
723 if (s == NULL)
724 return false;
725 return s->get_count_info (stack[0].second, info);
726 }
727
728 /* Mark LOC as annotated. */
729
730 void
mark_annotated(location_t loc)731 autofdo_source_profile::mark_annotated (location_t loc)
732 {
733 inline_stack stack;
734 get_inline_stack (loc, &stack);
735 if (stack.length () == 0)
736 return;
737 function_instance *s = get_function_instance_by_inline_stack (stack);
738 if (s == NULL)
739 return;
740 s->mark_annotated (stack[0].second);
741 }
742
743 /* Update value profile INFO for STMT from the inlined indirect callsite.
744 Return true if INFO is updated. */
745
746 bool
update_inlined_ind_target(gcall * stmt,count_info * info)747 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
748 count_info *info)
749 {
750 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
751 return false;
752
753 count_info old_info;
754 get_count_info (stmt, &old_info);
755 gcov_type total = 0;
756 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
757 iter != old_info.targets.end (); ++iter)
758 total += iter->second;
759
760 /* Program behavior changed, original promoted (and inlined) target is not
761 hot any more. Will avoid promote the original target.
762
763 To check if original promoted target is still hot, we check the total
764 count of the unpromoted targets (stored in old_info). If it is no less
765 than half of the callsite count (stored in INFO), the original promoted
766 target is considered not hot any more. */
767 if (total >= info->count / 2)
768 return false;
769
770 inline_stack stack;
771 get_inline_stack (gimple_location (stmt), &stack);
772 if (stack.length () == 0)
773 return false;
774 function_instance *s = get_function_instance_by_inline_stack (stack);
775 if (s == NULL)
776 return false;
777 icall_target_map map;
778 if (s->find_icall_target_map (stmt, &map) == 0)
779 return false;
780 for (icall_target_map::const_iterator iter = map.begin ();
781 iter != map.end (); ++iter)
782 info->targets[iter->first] = iter->second;
783 return true;
784 }
785
786 /* Find total count of the callee of EDGE. */
787
788 gcov_type
get_callsite_total_count(struct cgraph_edge * edge)789 autofdo_source_profile::get_callsite_total_count (
790 struct cgraph_edge *edge) const
791 {
792 inline_stack stack;
793 stack.safe_push (std::make_pair (edge->callee->decl, 0));
794 get_inline_stack (gimple_location (edge->call_stmt), &stack);
795
796 function_instance *s = get_function_instance_by_inline_stack (stack);
797 if (s == NULL
798 || afdo_string_table->get_index (IDENTIFIER_POINTER (
799 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
800 return 0;
801
802 return s->total_count ();
803 }
804
805 /* Read AutoFDO profile and returns TRUE on success. */
806
807 /* source profile format:
808
809 GCOV_TAG_AFDO_FUNCTION: 4 bytes
810 LENGTH: 4 bytes
811 NUM_FUNCTIONS: 4 bytes
812 FUNCTION_INSTANCE_1
813 FUNCTION_INSTANCE_2
814 ...
815 FUNCTION_INSTANCE_N. */
816
817 bool
read()818 autofdo_source_profile::read ()
819 {
820 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
821 {
822 inform (0, "Not expected TAG.");
823 return false;
824 }
825
826 /* Skip the length of the section. */
827 gcov_read_unsigned ();
828
829 /* Read in the function/callsite profile, and store it in local
830 data structure. */
831 unsigned function_num = gcov_read_unsigned ();
832 for (unsigned i = 0; i < function_num; i++)
833 {
834 function_instance::function_instance_stack stack;
835 function_instance *s = function_instance::read_function_instance (
836 &stack, gcov_read_counter ());
837 afdo_profile_info->sum_all += s->total_count ();
838 map_[s->name ()] = s;
839 }
840 return true;
841 }
842
843 /* Return the function_instance in the profile that correspond to the
844 inline STACK. */
845
846 function_instance *
get_function_instance_by_inline_stack(const inline_stack & stack)847 autofdo_source_profile::get_function_instance_by_inline_stack (
848 const inline_stack &stack) const
849 {
850 name_function_instance_map::const_iterator iter = map_.find (
851 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
852 if (iter == map_.end())
853 return NULL;
854 function_instance *s = iter->second;
855 for (unsigned i = stack.length() - 1; i > 0; i--)
856 {
857 s = s->get_function_instance_by_decl (
858 stack[i].second, stack[i - 1].first);
859 if (s == NULL)
860 return NULL;
861 }
862 return s;
863 }
864
865 /* Module profile is only used by LIPO. Here we simply ignore it. */
866
867 static void
fake_read_autofdo_module_profile()868 fake_read_autofdo_module_profile ()
869 {
870 /* Read in the module info. */
871 gcov_read_unsigned ();
872
873 /* Skip the length of the section. */
874 gcov_read_unsigned ();
875
876 /* Read in the file name table. */
877 unsigned total_module_num = gcov_read_unsigned ();
878 gcc_assert (total_module_num == 0);
879 }
880
881 /* Read data from profile data file. */
882
883 static void
read_profile(void)884 read_profile (void)
885 {
886 if (gcov_open (auto_profile_file, 1) == 0)
887 error ("Cannot open profile file %s.", auto_profile_file);
888
889 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
890 error ("AutoFDO profile magic number does not mathch.");
891
892 /* Skip the version number. */
893 unsigned version = gcov_read_unsigned ();
894 if (version != AUTO_PROFILE_VERSION)
895 error ("AutoFDO profile version %u does match %u.",
896 version, AUTO_PROFILE_VERSION);
897
898 /* Skip the empty integer. */
899 gcov_read_unsigned ();
900
901 /* string_table. */
902 afdo_string_table = new string_table ();
903 if (!afdo_string_table->read())
904 error ("Cannot read string table from %s.", auto_profile_file);
905
906 /* autofdo_source_profile. */
907 afdo_source_profile = autofdo_source_profile::create ();
908 if (afdo_source_profile == NULL)
909 error ("Cannot read function profile from %s.", auto_profile_file);
910
911 /* autofdo_module_profile. */
912 fake_read_autofdo_module_profile ();
913
914 /* Read in the working set. */
915 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
916 error ("Cannot read working set from %s.", auto_profile_file);
917
918 /* Skip the length of the section. */
919 gcov_read_unsigned ();
920 gcov_working_set_t set[128];
921 for (unsigned i = 0; i < 128; i++)
922 {
923 set[i].num_counters = gcov_read_unsigned ();
924 set[i].min_counter = gcov_read_counter ();
925 }
926 add_working_set (set);
927 }
928
929 /* From AutoFDO profiles, find values inside STMT for that we want to measure
930 histograms for indirect-call optimization.
931
932 This function is actually served for 2 purposes:
933 * before annotation, we need to mark histogram, promote and inline
934 * after annotation, we just need to mark, and let follow-up logic to
935 decide if it needs to promote and inline. */
936
937 static void
afdo_indirect_call(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)938 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
939 bool transform)
940 {
941 gimple *gs = gsi_stmt (*gsi);
942 tree callee;
943
944 if (map.size () == 0)
945 return;
946 gcall *stmt = dyn_cast <gcall *> (gs);
947 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
948 return;
949
950 callee = gimple_call_fn (stmt);
951
952 histogram_value hist = gimple_alloc_histogram_value (
953 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
954 hist->n_counters = 3;
955 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
956 gimple_add_histogram_value (cfun, stmt, hist);
957
958 gcov_type total = 0;
959 icall_target_map::const_iterator max_iter = map.end ();
960
961 for (icall_target_map::const_iterator iter = map.begin ();
962 iter != map.end (); ++iter)
963 {
964 total += iter->second;
965 if (max_iter == map.end () || max_iter->second < iter->second)
966 max_iter = iter;
967 }
968
969 hist->hvalue.counters[0]
970 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
971 hist->hvalue.counters[1] = max_iter->second;
972 hist->hvalue.counters[2] = total;
973
974 if (!transform)
975 return;
976
977 struct cgraph_edge *indirect_edge
978 = cgraph_node::get (current_function_decl)->get_edge (stmt);
979 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
980 get_identifier ((const char *) hist->hvalue.counters[0]));
981
982 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
983 return;
984 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
985 return;
986 struct cgraph_edge *new_edge
987 = indirect_edge->make_speculative (direct_call, 0, 0);
988 new_edge->redirect_call_stmt_to_callee ();
989 gimple_remove_histogram_value (cfun, stmt, hist);
990 inline_call (new_edge, true, NULL, NULL, false);
991 }
992
993 /* From AutoFDO profiles, find values inside STMT for that we want to measure
994 histograms and adds them to list VALUES. */
995
996 static void
afdo_vpt(gimple_stmt_iterator * gsi,const icall_target_map & map,bool transform)997 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
998 bool transform)
999 {
1000 afdo_indirect_call (gsi, map, transform);
1001 }
1002
1003 typedef std::set<basic_block> bb_set;
1004 typedef std::set<edge> edge_set;
1005
1006 static bool
is_bb_annotated(const basic_block bb,const bb_set & annotated)1007 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1008 {
1009 return annotated.find (bb) != annotated.end ();
1010 }
1011
1012 static void
set_bb_annotated(basic_block bb,bb_set * annotated)1013 set_bb_annotated (basic_block bb, bb_set *annotated)
1014 {
1015 annotated->insert (bb);
1016 }
1017
1018 static bool
is_edge_annotated(const edge e,const edge_set & annotated)1019 is_edge_annotated (const edge e, const edge_set &annotated)
1020 {
1021 return annotated.find (e) != annotated.end ();
1022 }
1023
1024 static void
set_edge_annotated(edge e,edge_set * annotated)1025 set_edge_annotated (edge e, edge_set *annotated)
1026 {
1027 annotated->insert (e);
1028 }
1029
1030 /* For a given BB, set its execution count. Attach value profile if a stmt
1031 is not in PROMOTED, because we only want to promote an indirect call once.
1032 Return TRUE if BB is annotated. */
1033
1034 static bool
afdo_set_bb_count(basic_block bb,const stmt_set & promoted)1035 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1036 {
1037 gimple_stmt_iterator gsi;
1038 edge e;
1039 edge_iterator ei;
1040 gcov_type max_count = 0;
1041 bool has_annotated = false;
1042
1043 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1044 {
1045 count_info info;
1046 gimple *stmt = gsi_stmt (gsi);
1047 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1048 continue;
1049 if (afdo_source_profile->get_count_info (stmt, &info))
1050 {
1051 if (info.count > max_count)
1052 max_count = info.count;
1053 has_annotated = true;
1054 if (info.targets.size () > 0
1055 && promoted.find (stmt) == promoted.end ())
1056 afdo_vpt (&gsi, info.targets, false);
1057 }
1058 }
1059
1060 if (!has_annotated)
1061 return false;
1062
1063 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1064 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1065 for (gphi_iterator gpi = gsi_start_phis (bb);
1066 !gsi_end_p (gpi);
1067 gsi_next (&gpi))
1068 {
1069 gphi *phi = gpi.phi ();
1070 size_t i;
1071 for (i = 0; i < gimple_phi_num_args (phi); i++)
1072 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1073 }
1074 FOR_EACH_EDGE (e, ei, bb->succs)
1075 afdo_source_profile->mark_annotated (e->goto_locus);
1076
1077 bb->count = max_count;
1078 return true;
1079 }
1080
1081 /* BB1 and BB2 are in an equivalent class iff:
1082 1. BB1 dominates BB2.
1083 2. BB2 post-dominates BB1.
1084 3. BB1 and BB2 are in the same loop nest.
1085 This function finds the equivalent class for each basic block, and
1086 stores a pointer to the first BB in its equivalent class. Meanwhile,
1087 set bb counts for the same equivalent class to be idenical. Update
1088 ANNOTATED_BB for the first BB in its equivalent class. */
1089
1090 static void
afdo_find_equiv_class(bb_set * annotated_bb)1091 afdo_find_equiv_class (bb_set *annotated_bb)
1092 {
1093 basic_block bb;
1094
1095 FOR_ALL_BB_FN (bb, cfun)
1096 bb->aux = NULL;
1097
1098 FOR_ALL_BB_FN (bb, cfun)
1099 {
1100 vec<basic_block> dom_bbs;
1101 basic_block bb1;
1102 int i;
1103
1104 if (bb->aux != NULL)
1105 continue;
1106 bb->aux = bb;
1107 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1108 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1109 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1110 && bb1->loop_father == bb->loop_father)
1111 {
1112 bb1->aux = bb;
1113 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1114 {
1115 bb->count = bb1->count;
1116 set_bb_annotated (bb, annotated_bb);
1117 }
1118 }
1119 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1120 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1121 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1122 && bb1->loop_father == bb->loop_father)
1123 {
1124 bb1->aux = bb;
1125 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1126 {
1127 bb->count = bb1->count;
1128 set_bb_annotated (bb, annotated_bb);
1129 }
1130 }
1131 }
1132 }
1133
1134 /* If a basic block's count is known, and only one of its in/out edges' count
1135 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1136 edges' counts are known, then the basic block's unknown count can also be
1137 calculated.
1138 IS_SUCC is true if out edges of a basic blocks are examined.
1139 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1140 Return TRUE if any basic block/edge count is changed. */
1141
1142 static bool
afdo_propagate_edge(bool is_succ,bb_set * annotated_bb,edge_set * annotated_edge)1143 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1144 edge_set *annotated_edge)
1145 {
1146 basic_block bb;
1147 bool changed = false;
1148
1149 FOR_EACH_BB_FN (bb, cfun)
1150 {
1151 edge e, unknown_edge = NULL;
1152 edge_iterator ei;
1153 int num_unknown_edge = 0;
1154 gcov_type total_known_count = 0;
1155
1156 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1157 if (!is_edge_annotated (e, *annotated_edge))
1158 num_unknown_edge++, unknown_edge = e;
1159 else
1160 total_known_count += e->count;
1161
1162 if (num_unknown_edge == 0)
1163 {
1164 if (total_known_count > bb->count)
1165 {
1166 bb->count = total_known_count;
1167 changed = true;
1168 }
1169 if (!is_bb_annotated (bb, *annotated_bb))
1170 {
1171 set_bb_annotated (bb, annotated_bb);
1172 changed = true;
1173 }
1174 }
1175 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1176 {
1177 if (bb->count >= total_known_count)
1178 unknown_edge->count = bb->count - total_known_count;
1179 else
1180 unknown_edge->count = 0;
1181 set_edge_annotated (unknown_edge, annotated_edge);
1182 changed = true;
1183 }
1184 }
1185 return changed;
1186 }
1187
1188 /* Special propagation for circuit expressions. Because GCC translates
1189 control flow into data flow for circuit expressions. E.g.
1190 BB1:
1191 if (a && b)
1192 BB2
1193 else
1194 BB3
1195
1196 will be translated into:
1197
1198 BB1:
1199 if (a)
1200 goto BB.t1
1201 else
1202 goto BB.t3
1203 BB.t1:
1204 if (b)
1205 goto BB.t2
1206 else
1207 goto BB.t3
1208 BB.t2:
1209 goto BB.t3
1210 BB.t3:
1211 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1212 if (tmp)
1213 goto BB2
1214 else
1215 goto BB3
1216
1217 In this case, we need to propagate through PHI to determine the edge
1218 count of BB1->BB.t1, BB.t1->BB.t2.
1219 Update ANNOTATED_EDGE accordingly. */
1220
1221 static void
afdo_propagate_circuit(const bb_set & annotated_bb,edge_set * annotated_edge)1222 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1223 {
1224 basic_block bb;
1225 FOR_ALL_BB_FN (bb, cfun)
1226 {
1227 gimple *def_stmt;
1228 tree cmp_rhs, cmp_lhs;
1229 gimple *cmp_stmt = last_stmt (bb);
1230 edge e;
1231 edge_iterator ei;
1232
1233 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1234 continue;
1235 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1236 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1237 if (!TREE_CONSTANT (cmp_rhs)
1238 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1239 continue;
1240 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1241 continue;
1242 if (!is_bb_annotated (bb, annotated_bb))
1243 continue;
1244 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1245 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1246 && gimple_assign_single_p (def_stmt)
1247 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1248 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1249 if (!def_stmt)
1250 continue;
1251 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1252 if (!phi_stmt)
1253 continue;
1254 FOR_EACH_EDGE (e, ei, bb->succs)
1255 {
1256 unsigned i, total = 0;
1257 edge only_one;
1258 bool check_value_one = (((integer_onep (cmp_rhs))
1259 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1260 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1261 if (!is_edge_annotated (e, *annotated_edge))
1262 continue;
1263 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1264 {
1265 tree val = gimple_phi_arg_def (phi_stmt, i);
1266 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1267
1268 if (!TREE_CONSTANT (val)
1269 || !(integer_zerop (val) || integer_onep (val)))
1270 continue;
1271 if (check_value_one ^ integer_onep (val))
1272 continue;
1273 total++;
1274 only_one = ep;
1275 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1276 {
1277 ep->probability = 0;
1278 ep->count = 0;
1279 set_edge_annotated (ep, annotated_edge);
1280 }
1281 }
1282 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1283 {
1284 only_one->probability = e->probability;
1285 only_one->count = e->count;
1286 set_edge_annotated (only_one, annotated_edge);
1287 }
1288 }
1289 }
1290 }
1291
1292 /* Propagate the basic block count and edge count on the control flow
1293 graph. We do the propagation iteratively until stablize. */
1294
1295 static void
afdo_propagate(bb_set * annotated_bb,edge_set * annotated_edge)1296 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1297 {
1298 basic_block bb;
1299 bool changed = true;
1300 int i = 0;
1301
1302 FOR_ALL_BB_FN (bb, cfun)
1303 {
1304 bb->count = ((basic_block)bb->aux)->count;
1305 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1306 set_bb_annotated (bb, annotated_bb);
1307 }
1308
1309 while (changed && i++ < 10)
1310 {
1311 changed = false;
1312
1313 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1314 changed = true;
1315 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1316 changed = true;
1317 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1318 }
1319 }
1320
1321 /* Propagate counts on control flow graph and calculate branch
1322 probabilities. */
1323
1324 static void
afdo_calculate_branch_prob(bb_set * annotated_bb,edge_set * annotated_edge)1325 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1326 {
1327 basic_block bb;
1328 bool has_sample = false;
1329
1330 FOR_EACH_BB_FN (bb, cfun)
1331 {
1332 if (bb->count > 0)
1333 {
1334 has_sample = true;
1335 break;
1336 }
1337 }
1338
1339 if (!has_sample)
1340 return;
1341
1342 calculate_dominance_info (CDI_POST_DOMINATORS);
1343 calculate_dominance_info (CDI_DOMINATORS);
1344 loop_optimizer_init (0);
1345
1346 afdo_find_equiv_class (annotated_bb);
1347 afdo_propagate (annotated_bb, annotated_edge);
1348
1349 FOR_EACH_BB_FN (bb, cfun)
1350 {
1351 edge e;
1352 edge_iterator ei;
1353 int num_unknown_succ = 0;
1354 gcov_type total_count = 0;
1355
1356 FOR_EACH_EDGE (e, ei, bb->succs)
1357 {
1358 if (!is_edge_annotated (e, *annotated_edge))
1359 num_unknown_succ++;
1360 else
1361 total_count += e->count;
1362 }
1363 if (num_unknown_succ == 0 && total_count > 0)
1364 {
1365 FOR_EACH_EDGE (e, ei, bb->succs)
1366 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1367 }
1368 }
1369 FOR_ALL_BB_FN (bb, cfun)
1370 {
1371 edge e;
1372 edge_iterator ei;
1373
1374 FOR_EACH_EDGE (e, ei, bb->succs)
1375 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1376 bb->aux = NULL;
1377 }
1378
1379 loop_optimizer_finalize ();
1380 free_dominance_info (CDI_DOMINATORS);
1381 free_dominance_info (CDI_POST_DOMINATORS);
1382 }
1383
1384 /* Perform value profile transformation using AutoFDO profile. Add the
1385 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1386 indirect call promoted. */
1387
1388 static bool
afdo_vpt_for_early_inline(stmt_set * promoted_stmts)1389 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1390 {
1391 basic_block bb;
1392 if (afdo_source_profile->get_function_instance_by_decl (
1393 current_function_decl) == NULL)
1394 return false;
1395
1396 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1397
1398 bool has_vpt = false;
1399 FOR_EACH_BB_FN (bb, cfun)
1400 {
1401 if (!has_indirect_call (bb))
1402 continue;
1403 gimple_stmt_iterator gsi;
1404
1405 gcov_type bb_count = 0;
1406 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1407 {
1408 count_info info;
1409 gimple *stmt = gsi_stmt (gsi);
1410 if (afdo_source_profile->get_count_info (stmt, &info))
1411 bb_count = MAX (bb_count, info.count);
1412 }
1413
1414 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1415 {
1416 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1417 /* IC_promotion and early_inline_2 is done in multiple iterations.
1418 No need to promoted the stmt if its in promoted_stmts (means
1419 it is already been promoted in the previous iterations). */
1420 if ((!stmt) || gimple_call_fn (stmt) == NULL
1421 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1422 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1423 continue;
1424
1425 count_info info;
1426 afdo_source_profile->get_count_info (stmt, &info);
1427 info.count = bb_count;
1428 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1429 {
1430 /* Promote the indirect call and update the promoted_stmts. */
1431 promoted_stmts->insert (stmt);
1432 afdo_vpt (&gsi, info.targets, true);
1433 has_vpt = true;
1434 }
1435 }
1436 }
1437
1438 if (has_vpt)
1439 {
1440 unsigned todo = optimize_inline_calls (current_function_decl);
1441 if (todo & TODO_update_ssa_any)
1442 update_ssa (TODO_update_ssa);
1443 return true;
1444 }
1445
1446 return false;
1447 }
1448
1449 /* Annotate auto profile to the control flow graph. Do not annotate value
1450 profile for stmts in PROMOTED_STMTS. */
1451
1452 static void
afdo_annotate_cfg(const stmt_set & promoted_stmts)1453 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1454 {
1455 basic_block bb;
1456 bb_set annotated_bb;
1457 edge_set annotated_edge;
1458 const function_instance *s
1459 = afdo_source_profile->get_function_instance_by_decl (
1460 current_function_decl);
1461
1462 if (s == NULL)
1463 return;
1464 cgraph_node::get (current_function_decl)->count = s->head_count ();
1465 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1466 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1467
1468 FOR_EACH_BB_FN (bb, cfun)
1469 {
1470 edge e;
1471 edge_iterator ei;
1472
1473 bb->count = 0;
1474 FOR_EACH_EDGE (e, ei, bb->succs)
1475 e->count = 0;
1476
1477 if (afdo_set_bb_count (bb, promoted_stmts))
1478 set_bb_annotated (bb, &annotated_bb);
1479 if (bb->count > max_count)
1480 max_count = bb->count;
1481 }
1482 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1483 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1484 {
1485 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1486 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1487 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1488 }
1489 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1490 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1491 {
1492 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1493 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1494 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1495 }
1496 afdo_source_profile->mark_annotated (
1497 DECL_SOURCE_LOCATION (current_function_decl));
1498 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1499 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1500 if (max_count > 0)
1501 {
1502 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1503 counts_to_freqs ();
1504 profile_status_for_fn (cfun) = PROFILE_READ;
1505 }
1506 if (flag_value_profile_transformations)
1507 {
1508 gimple_value_profile_transformations ();
1509 free_dominance_info (CDI_DOMINATORS);
1510 free_dominance_info (CDI_POST_DOMINATORS);
1511 update_ssa (TODO_update_ssa);
1512 }
1513 }
1514
1515 /* Wrapper function to invoke early inliner. */
1516
1517 static void
early_inline()1518 early_inline ()
1519 {
1520 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1521 unsigned todo = early_inliner (cfun);
1522 if (todo & TODO_update_ssa_any)
1523 update_ssa (TODO_update_ssa);
1524 }
1525
1526 /* Use AutoFDO profile to annoate the control flow graph.
1527 Return the todo flag. */
1528
1529 static unsigned int
auto_profile(void)1530 auto_profile (void)
1531 {
1532 struct cgraph_node *node;
1533
1534 if (symtab->state == FINISHED)
1535 return 0;
1536
1537 init_node_map (true);
1538 profile_info = autofdo::afdo_profile_info;
1539
1540 FOR_EACH_FUNCTION (node)
1541 {
1542 if (!gimple_has_body_p (node->decl))
1543 continue;
1544
1545 /* Don't profile functions produced for builtin stuff. */
1546 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1547 continue;
1548
1549 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1550
1551 /* First do indirect call promotion and early inline to make the
1552 IR match the profiled binary before actual annotation.
1553
1554 This is needed because an indirect call might have been promoted
1555 and inlined in the profiled binary. If we do not promote and
1556 inline these indirect calls before annotation, the profile for
1557 these promoted functions will be lost.
1558
1559 e.g. foo() --indirect_call--> bar()
1560 In profiled binary, the callsite is promoted and inlined, making
1561 the profile look like:
1562
1563 foo: {
1564 loc_foo_1: count_1
1565 bar@loc_foo_2: {
1566 loc_bar_1: count_2
1567 loc_bar_2: count_3
1568 }
1569 }
1570
1571 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1572 If we perform annotation on it, the profile inside bar@loc_foo2
1573 will be wasted.
1574
1575 To avoid this, we promote loc_foo_2 and inline the promoted bar
1576 function before annotation, so the profile inside bar@loc_foo2
1577 will be useful. */
1578 autofdo::stmt_set promoted_stmts;
1579 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1580 {
1581 if (!flag_value_profile_transformations
1582 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1583 break;
1584 early_inline ();
1585 }
1586
1587 early_inline ();
1588 autofdo::afdo_annotate_cfg (promoted_stmts);
1589 compute_function_frequency ();
1590
1591 /* Local pure-const may imply need to fixup the cfg. */
1592 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1593 cleanup_tree_cfg ();
1594
1595 free_dominance_info (CDI_DOMINATORS);
1596 free_dominance_info (CDI_POST_DOMINATORS);
1597 cgraph_edge::rebuild_edges ();
1598 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1599 pop_cfun ();
1600 }
1601
1602 return TODO_rebuild_cgraph_edges;
1603 }
1604 } /* namespace autofdo. */
1605
1606 /* Read the profile from the profile data file. */
1607
1608 void
read_autofdo_file(void)1609 read_autofdo_file (void)
1610 {
1611 if (auto_profile_file == NULL)
1612 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1613
1614 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1615 1, sizeof (struct gcov_ctr_summary));
1616 autofdo::afdo_profile_info->runs = 1;
1617 autofdo::afdo_profile_info->sum_max = 0;
1618 autofdo::afdo_profile_info->sum_all = 0;
1619
1620 /* Read the profile from the profile file. */
1621 autofdo::read_profile ();
1622 }
1623
1624 /* Free the resources. */
1625
1626 void
end_auto_profile(void)1627 end_auto_profile (void)
1628 {
1629 delete autofdo::afdo_source_profile;
1630 delete autofdo::afdo_string_table;
1631 profile_info = NULL;
1632 }
1633
1634 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1635
1636 bool
afdo_callsite_hot_enough_for_early_inline(struct cgraph_edge * edge)1637 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1638 {
1639 gcov_type count
1640 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1641
1642 if (count > 0)
1643 {
1644 bool is_hot;
1645 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1646 /* At early inline stage, profile_info is not set yet. We need to
1647 temporarily set it to afdo_profile_info to calculate hotness. */
1648 profile_info = autofdo::afdo_profile_info;
1649 is_hot = maybe_hot_count_p (NULL, count);
1650 profile_info = saved_profile_info;
1651 return is_hot;
1652 }
1653
1654 return false;
1655 }
1656
1657 namespace
1658 {
1659
1660 const pass_data pass_data_ipa_auto_profile = {
1661 SIMPLE_IPA_PASS, "afdo", /* name */
1662 OPTGROUP_NONE, /* optinfo_flags */
1663 TV_IPA_AUTOFDO, /* tv_id */
1664 0, /* properties_required */
1665 0, /* properties_provided */
1666 0, /* properties_destroyed */
1667 0, /* todo_flags_start */
1668 0, /* todo_flags_finish */
1669 };
1670
1671 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1672 {
1673 public:
pass_ipa_auto_profile(gcc::context * ctxt)1674 pass_ipa_auto_profile (gcc::context *ctxt)
1675 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1676 {
1677 }
1678
1679 /* opt_pass methods: */
1680 virtual bool
gate(function *)1681 gate (function *)
1682 {
1683 return flag_auto_profile;
1684 }
1685 virtual unsigned int
execute(function *)1686 execute (function *)
1687 {
1688 return autofdo::auto_profile ();
1689 }
1690 }; // class pass_ipa_auto_profile
1691
1692 } // anon namespace
1693
1694 simple_ipa_opt_pass *
make_pass_ipa_auto_profile(gcc::context * ctxt)1695 make_pass_ipa_auto_profile (gcc::context *ctxt)
1696 {
1697 return new pass_ipa_auto_profile (ctxt);
1698 }
1699