1 /* Branch prediction routines for the GNU compiler.
2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* References:
21 
22    [1] "Branch Prediction for Free"
23        Ball and Larus; PLDI '93.
24    [2] "Static Branch Frequency and Program Profile Analysis"
25        Wu and Larus; MICRO-27.
26    [3] "Corpus-based Static Branch Prediction"
27        Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
28 
29 
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "tree.h"
35 #include "calls.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "hard-reg-set.h"
39 #include "basic-block.h"
40 #include "insn-config.h"
41 #include "regs.h"
42 #include "flags.h"
43 #include "function.h"
44 #include "except.h"
45 #include "diagnostic-core.h"
46 #include "recog.h"
47 #include "expr.h"
48 #include "predict.h"
49 #include "coverage.h"
50 #include "sreal.h"
51 #include "params.h"
52 #include "target.h"
53 #include "cfgloop.h"
54 #include "pointer-set.h"
55 #include "tree-ssa-alias.h"
56 #include "internal-fn.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "cgraph.h"
63 #include "tree-cfg.h"
64 #include "tree-phinodes.h"
65 #include "ssa-iterators.h"
66 #include "tree-ssa-loop-niter.h"
67 #include "tree-ssa-loop.h"
68 #include "tree-pass.h"
69 #include "tree-scalar-evolution.h"
70 #include "cfgloop.h"
71 
72 /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
73 		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
74 static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
75 	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
76 
77 static void combine_predictions_for_insn (rtx, basic_block);
78 static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
79 static void predict_paths_leading_to (basic_block, enum br_predictor, enum prediction);
80 static void predict_paths_leading_to_edge (edge, enum br_predictor, enum prediction);
81 static bool can_predict_insn_p (const_rtx);
82 
83 /* Information we hold about each branch predictor.
84    Filled using information from predict.def.  */
85 
86 struct predictor_info
87 {
88   const char *const name;	/* Name used in the debugging dumps.  */
89   const int hitrate;		/* Expected hitrate used by
90 				   predict_insn_def call.  */
91   const int flags;
92 };
93 
94 /* Use given predictor without Dempster-Shaffer theory if it matches
95    using first_match heuristics.  */
96 #define PRED_FLAG_FIRST_MATCH 1
97 
98 /* Recompute hitrate in percent to our representation.  */
99 
100 #define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
101 
102 #define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
103 static const struct predictor_info predictor_info[]= {
104 #include "predict.def"
105 
106   /* Upper bound on predictors.  */
107   {NULL, 0, 0}
108 };
109 #undef DEF_PREDICTOR
110 
111 /* Return TRUE if frequency FREQ is considered to be hot.  */
112 
113 static inline bool
maybe_hot_frequency_p(struct function * fun,int freq)114 maybe_hot_frequency_p (struct function *fun, int freq)
115 {
116   struct cgraph_node *node = cgraph_get_node (fun->decl);
117   if (!profile_info || !flag_branch_probabilities)
118     {
119       if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
120         return false;
121       if (node->frequency == NODE_FREQUENCY_HOT)
122         return true;
123     }
124   if (profile_status_for_fn (fun) == PROFILE_ABSENT)
125     return true;
126   if (node->frequency == NODE_FREQUENCY_EXECUTED_ONCE
127       && freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency * 2 / 3))
128     return false;
129   if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0)
130     return false;
131   if (freq < (ENTRY_BLOCK_PTR_FOR_FN (fun)->frequency
132 	      / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
133     return false;
134   return true;
135 }
136 
137 static gcov_type min_count = -1;
138 
139 /* Determine the threshold for hot BB counts.  */
140 
141 gcov_type
get_hot_bb_threshold()142 get_hot_bb_threshold ()
143 {
144   gcov_working_set_t *ws;
145   if (min_count == -1)
146     {
147       ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
148       gcc_assert (ws);
149       min_count = ws->min_counter;
150     }
151   return min_count;
152 }
153 
154 /* Set the threshold for hot BB counts.  */
155 
156 void
set_hot_bb_threshold(gcov_type min)157 set_hot_bb_threshold (gcov_type min)
158 {
159   min_count = min;
160 }
161 
162 /* Return TRUE if frequency FREQ is considered to be hot.  */
163 
164 static inline bool
maybe_hot_count_p(struct function * fun,gcov_type count)165 maybe_hot_count_p (struct function *fun, gcov_type count)
166 {
167   if (fun && profile_status_for_fn (fun) != PROFILE_READ)
168     return true;
169   /* Code executed at most once is not hot.  */
170   if (profile_info->runs >= count)
171     return false;
172   return (count >= get_hot_bb_threshold ());
173 }
174 
175 /* Return true in case BB can be CPU intensive and should be optimized
176    for maximal performance.  */
177 
178 bool
maybe_hot_bb_p(struct function * fun,const_basic_block bb)179 maybe_hot_bb_p (struct function *fun, const_basic_block bb)
180 {
181   gcc_checking_assert (fun);
182   if (profile_status_for_fn (fun) == PROFILE_READ)
183     return maybe_hot_count_p (fun, bb->count);
184   return maybe_hot_frequency_p (fun, bb->frequency);
185 }
186 
187 /* Return true if the call can be hot.  */
188 
189 bool
cgraph_maybe_hot_edge_p(struct cgraph_edge * edge)190 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
191 {
192   if (profile_info && flag_branch_probabilities
193       && !maybe_hot_count_p (NULL,
194                              edge->count))
195     return false;
196   if (edge->caller->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED
197       || (edge->callee
198 	  && edge->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
199     return false;
200   if (edge->caller->frequency > NODE_FREQUENCY_UNLIKELY_EXECUTED
201       && (edge->callee
202 	  && edge->callee->frequency <= NODE_FREQUENCY_EXECUTED_ONCE))
203     return false;
204   if (optimize_size)
205     return false;
206   if (edge->caller->frequency == NODE_FREQUENCY_HOT)
207     return true;
208   if (edge->caller->frequency == NODE_FREQUENCY_EXECUTED_ONCE
209       && edge->frequency < CGRAPH_FREQ_BASE * 3 / 2)
210     return false;
211   if (flag_guess_branch_prob)
212     {
213       if (PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION) == 0
214 	  || edge->frequency <= (CGRAPH_FREQ_BASE
215 				 / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)))
216         return false;
217     }
218   return true;
219 }
220 
221 /* Return true in case BB can be CPU intensive and should be optimized
222    for maximal performance.  */
223 
224 bool
maybe_hot_edge_p(edge e)225 maybe_hot_edge_p (edge e)
226 {
227   if (profile_status_for_fn (cfun) == PROFILE_READ)
228     return maybe_hot_count_p (cfun, e->count);
229   return maybe_hot_frequency_p (cfun, EDGE_FREQUENCY (e));
230 }
231 
232 
233 
234 /* Return true if profile COUNT and FREQUENCY, or function FUN static
235    node frequency reflects never being executed.  */
236 
237 static bool
probably_never_executed(struct function * fun,gcov_type count,int frequency)238 probably_never_executed (struct function *fun,
239                          gcov_type count, int frequency)
240 {
241   gcc_checking_assert (fun);
242   if (profile_status_for_fn (cfun) == PROFILE_READ)
243     {
244       int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
245       if (count * unlikely_count_fraction >= profile_info->runs)
246 	return false;
247       if (!frequency)
248 	return true;
249       if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
250 	return false;
251       if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
252 	{
253           gcov_type computed_count;
254           /* Check for possibility of overflow, in which case entry bb count
255              is large enough to do the division first without losing much
256              precision.  */
257 	  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count < REG_BR_PROB_BASE *
258 	      REG_BR_PROB_BASE)
259             {
260               gcov_type scaled_count
261 		  = frequency * ENTRY_BLOCK_PTR_FOR_FN (cfun)->count *
262 	     unlikely_count_fraction;
263 	      computed_count = RDIV (scaled_count,
264 				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
265             }
266           else
267             {
268 	      computed_count = RDIV (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count,
269 				     ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
270               computed_count *= frequency * unlikely_count_fraction;
271             }
272           if (computed_count >= profile_info->runs)
273             return false;
274 	}
275       return true;
276     }
277   if ((!profile_info || !flag_branch_probabilities)
278       && (cgraph_get_node (fun->decl)->frequency
279 	  == NODE_FREQUENCY_UNLIKELY_EXECUTED))
280     return true;
281   return false;
282 }
283 
284 
285 /* Return true in case BB is probably never executed.  */
286 
287 bool
probably_never_executed_bb_p(struct function * fun,const_basic_block bb)288 probably_never_executed_bb_p (struct function *fun, const_basic_block bb)
289 {
290   return probably_never_executed (fun, bb->count, bb->frequency);
291 }
292 
293 
294 /* Return true in case edge E is probably never executed.  */
295 
296 bool
probably_never_executed_edge_p(struct function * fun,edge e)297 probably_never_executed_edge_p (struct function *fun, edge e)
298 {
299   return probably_never_executed (fun, e->count, EDGE_FREQUENCY (e));
300 }
301 
302 /* Return true if NODE should be optimized for size.  */
303 
304 bool
cgraph_optimize_for_size_p(struct cgraph_node * node)305 cgraph_optimize_for_size_p (struct cgraph_node *node)
306 {
307   if (optimize_size)
308     return true;
309   if (node && (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED))
310     return true;
311   else
312     return false;
313 }
314 
315 /* Return true when current function should always be optimized for size.  */
316 
317 bool
optimize_function_for_size_p(struct function * fun)318 optimize_function_for_size_p (struct function *fun)
319 {
320   if (optimize_size)
321     return true;
322   if (!fun || !fun->decl)
323     return false;
324   return cgraph_optimize_for_size_p (cgraph_get_node (fun->decl));
325 }
326 
327 /* Return true when current function should always be optimized for speed.  */
328 
329 bool
optimize_function_for_speed_p(struct function * fun)330 optimize_function_for_speed_p (struct function *fun)
331 {
332   return !optimize_function_for_size_p (fun);
333 }
334 
335 /* Return TRUE when BB should be optimized for size.  */
336 
337 bool
optimize_bb_for_size_p(const_basic_block bb)338 optimize_bb_for_size_p (const_basic_block bb)
339 {
340   return optimize_function_for_size_p (cfun) || !maybe_hot_bb_p (cfun, bb);
341 }
342 
343 /* Return TRUE when BB should be optimized for speed.  */
344 
345 bool
optimize_bb_for_speed_p(const_basic_block bb)346 optimize_bb_for_speed_p (const_basic_block bb)
347 {
348   return !optimize_bb_for_size_p (bb);
349 }
350 
351 /* Return TRUE when BB should be optimized for size.  */
352 
353 bool
optimize_edge_for_size_p(edge e)354 optimize_edge_for_size_p (edge e)
355 {
356   return optimize_function_for_size_p (cfun) || !maybe_hot_edge_p (e);
357 }
358 
359 /* Return TRUE when BB should be optimized for speed.  */
360 
361 bool
optimize_edge_for_speed_p(edge e)362 optimize_edge_for_speed_p (edge e)
363 {
364   return !optimize_edge_for_size_p (e);
365 }
366 
367 /* Return TRUE when BB should be optimized for size.  */
368 
369 bool
optimize_insn_for_size_p(void)370 optimize_insn_for_size_p (void)
371 {
372   return optimize_function_for_size_p (cfun) || !crtl->maybe_hot_insn_p;
373 }
374 
375 /* Return TRUE when BB should be optimized for speed.  */
376 
377 bool
optimize_insn_for_speed_p(void)378 optimize_insn_for_speed_p (void)
379 {
380   return !optimize_insn_for_size_p ();
381 }
382 
383 /* Return TRUE when LOOP should be optimized for size.  */
384 
385 bool
optimize_loop_for_size_p(struct loop * loop)386 optimize_loop_for_size_p (struct loop *loop)
387 {
388   return optimize_bb_for_size_p (loop->header);
389 }
390 
391 /* Return TRUE when LOOP should be optimized for speed.  */
392 
393 bool
optimize_loop_for_speed_p(struct loop * loop)394 optimize_loop_for_speed_p (struct loop *loop)
395 {
396   return optimize_bb_for_speed_p (loop->header);
397 }
398 
399 /* Return TRUE when LOOP nest should be optimized for speed.  */
400 
401 bool
optimize_loop_nest_for_speed_p(struct loop * loop)402 optimize_loop_nest_for_speed_p (struct loop *loop)
403 {
404   struct loop *l = loop;
405   if (optimize_loop_for_speed_p (loop))
406     return true;
407   l = loop->inner;
408   while (l && l != loop)
409     {
410       if (optimize_loop_for_speed_p (l))
411         return true;
412       if (l->inner)
413         l = l->inner;
414       else if (l->next)
415         l = l->next;
416       else
417         {
418 	  while (l != loop && !l->next)
419 	    l = loop_outer (l);
420 	  if (l != loop)
421 	    l = l->next;
422 	}
423     }
424   return false;
425 }
426 
427 /* Return TRUE when LOOP nest should be optimized for size.  */
428 
429 bool
optimize_loop_nest_for_size_p(struct loop * loop)430 optimize_loop_nest_for_size_p (struct loop *loop)
431 {
432   return !optimize_loop_nest_for_speed_p (loop);
433 }
434 
435 /* Return true when edge E is likely to be well predictable by branch
436    predictor.  */
437 
438 bool
predictable_edge_p(edge e)439 predictable_edge_p (edge e)
440 {
441   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
442     return false;
443   if ((e->probability
444        <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100)
445       || (REG_BR_PROB_BASE - e->probability
446           <= PARAM_VALUE (PARAM_PREDICTABLE_BRANCH_OUTCOME) * REG_BR_PROB_BASE / 100))
447     return true;
448   return false;
449 }
450 
451 
452 /* Set RTL expansion for BB profile.  */
453 
454 void
rtl_profile_for_bb(basic_block bb)455 rtl_profile_for_bb (basic_block bb)
456 {
457   crtl->maybe_hot_insn_p = maybe_hot_bb_p (cfun, bb);
458 }
459 
460 /* Set RTL expansion for edge profile.  */
461 
462 void
rtl_profile_for_edge(edge e)463 rtl_profile_for_edge (edge e)
464 {
465   crtl->maybe_hot_insn_p = maybe_hot_edge_p (e);
466 }
467 
468 /* Set RTL expansion to default mode (i.e. when profile info is not known).  */
469 void
default_rtl_profile(void)470 default_rtl_profile (void)
471 {
472   crtl->maybe_hot_insn_p = true;
473 }
474 
475 /* Return true if the one of outgoing edges is already predicted by
476    PREDICTOR.  */
477 
478 bool
rtl_predicted_by_p(const_basic_block bb,enum br_predictor predictor)479 rtl_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
480 {
481   rtx note;
482   if (!INSN_P (BB_END (bb)))
483     return false;
484   for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
485     if (REG_NOTE_KIND (note) == REG_BR_PRED
486 	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
487       return true;
488   return false;
489 }
490 
491 /* This map contains for a basic block the list of predictions for the
492    outgoing edges.  */
493 
494 static struct pointer_map_t *bb_predictions;
495 
496 /*  Structure representing predictions in tree level. */
497 
498 struct edge_prediction {
499     struct edge_prediction *ep_next;
500     edge ep_edge;
501     enum br_predictor ep_predictor;
502     int ep_probability;
503 };
504 
505 /* Return true if the one of outgoing edges is already predicted by
506    PREDICTOR.  */
507 
508 bool
gimple_predicted_by_p(const_basic_block bb,enum br_predictor predictor)509 gimple_predicted_by_p (const_basic_block bb, enum br_predictor predictor)
510 {
511   struct edge_prediction *i;
512   void **preds = pointer_map_contains (bb_predictions, bb);
513 
514   if (!preds)
515     return false;
516 
517   for (i = (struct edge_prediction *) *preds; i; i = i->ep_next)
518     if (i->ep_predictor == predictor)
519       return true;
520   return false;
521 }
522 
523 /* Return true when the probability of edge is reliable.
524 
525    The profile guessing code is good at predicting branch outcome (ie.
526    taken/not taken), that is predicted right slightly over 75% of time.
527    It is however notoriously poor on predicting the probability itself.
528    In general the profile appear a lot flatter (with probabilities closer
529    to 50%) than the reality so it is bad idea to use it to drive optimization
530    such as those disabling dynamic branch prediction for well predictable
531    branches.
532 
533    There are two exceptions - edges leading to noreturn edges and edges
534    predicted by number of iterations heuristics are predicted well.  This macro
535    should be able to distinguish those, but at the moment it simply check for
536    noreturn heuristic that is only one giving probability over 99% or bellow
537    1%.  In future we might want to propagate reliability information across the
538    CFG if we find this information useful on multiple places.   */
539 static bool
probability_reliable_p(int prob)540 probability_reliable_p (int prob)
541 {
542   return (profile_status_for_fn (cfun) == PROFILE_READ
543 	  || (profile_status_for_fn (cfun) == PROFILE_GUESSED
544 	      && (prob <= HITRATE (1) || prob >= HITRATE (99))));
545 }
546 
547 /* Same predicate as above, working on edges.  */
548 bool
edge_probability_reliable_p(const_edge e)549 edge_probability_reliable_p (const_edge e)
550 {
551   return probability_reliable_p (e->probability);
552 }
553 
554 /* Same predicate as edge_probability_reliable_p, working on notes.  */
555 bool
br_prob_note_reliable_p(const_rtx note)556 br_prob_note_reliable_p (const_rtx note)
557 {
558   gcc_assert (REG_NOTE_KIND (note) == REG_BR_PROB);
559   return probability_reliable_p (XINT (note, 0));
560 }
561 
562 static void
predict_insn(rtx insn,enum br_predictor predictor,int probability)563 predict_insn (rtx insn, enum br_predictor predictor, int probability)
564 {
565   gcc_assert (any_condjump_p (insn));
566   if (!flag_guess_branch_prob)
567     return;
568 
569   add_reg_note (insn, REG_BR_PRED,
570 		gen_rtx_CONCAT (VOIDmode,
571 				GEN_INT ((int) predictor),
572 				GEN_INT ((int) probability)));
573 }
574 
575 /* Predict insn by given predictor.  */
576 
577 void
predict_insn_def(rtx insn,enum br_predictor predictor,enum prediction taken)578 predict_insn_def (rtx insn, enum br_predictor predictor,
579 		  enum prediction taken)
580 {
581    int probability = predictor_info[(int) predictor].hitrate;
582 
583    if (taken != TAKEN)
584      probability = REG_BR_PROB_BASE - probability;
585 
586    predict_insn (insn, predictor, probability);
587 }
588 
589 /* Predict edge E with given probability if possible.  */
590 
591 void
rtl_predict_edge(edge e,enum br_predictor predictor,int probability)592 rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
593 {
594   rtx last_insn;
595   last_insn = BB_END (e->src);
596 
597   /* We can store the branch prediction information only about
598      conditional jumps.  */
599   if (!any_condjump_p (last_insn))
600     return;
601 
602   /* We always store probability of branching.  */
603   if (e->flags & EDGE_FALLTHRU)
604     probability = REG_BR_PROB_BASE - probability;
605 
606   predict_insn (last_insn, predictor, probability);
607 }
608 
609 /* Predict edge E with the given PROBABILITY.  */
610 void
gimple_predict_edge(edge e,enum br_predictor predictor,int probability)611 gimple_predict_edge (edge e, enum br_predictor predictor, int probability)
612 {
613   gcc_assert (profile_status_for_fn (cfun) != PROFILE_GUESSED);
614   if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun) && EDGE_COUNT (e->src->succs) >
615        1)
616       && flag_guess_branch_prob && optimize)
617     {
618       struct edge_prediction *i = XNEW (struct edge_prediction);
619       void **preds = pointer_map_insert (bb_predictions, e->src);
620 
621       i->ep_next = (struct edge_prediction *) *preds;
622       *preds = i;
623       i->ep_probability = probability;
624       i->ep_predictor = predictor;
625       i->ep_edge = e;
626     }
627 }
628 
629 /* Remove all predictions on given basic block that are attached
630    to edge E.  */
631 void
remove_predictions_associated_with_edge(edge e)632 remove_predictions_associated_with_edge (edge e)
633 {
634   void **preds;
635 
636   if (!bb_predictions)
637     return;
638 
639   preds = pointer_map_contains (bb_predictions, e->src);
640 
641   if (preds)
642     {
643       struct edge_prediction **prediction = (struct edge_prediction **) preds;
644       struct edge_prediction *next;
645 
646       while (*prediction)
647 	{
648 	  if ((*prediction)->ep_edge == e)
649 	    {
650 	      next = (*prediction)->ep_next;
651 	      free (*prediction);
652 	      *prediction = next;
653 	    }
654 	  else
655 	    prediction = &((*prediction)->ep_next);
656 	}
657     }
658 }
659 
660 /* Clears the list of predictions stored for BB.  */
661 
662 static void
clear_bb_predictions(basic_block bb)663 clear_bb_predictions (basic_block bb)
664 {
665   void **preds = pointer_map_contains (bb_predictions, bb);
666   struct edge_prediction *pred, *next;
667 
668   if (!preds)
669     return;
670 
671   for (pred = (struct edge_prediction *) *preds; pred; pred = next)
672     {
673       next = pred->ep_next;
674       free (pred);
675     }
676   *preds = NULL;
677 }
678 
679 /* Return true when we can store prediction on insn INSN.
680    At the moment we represent predictions only on conditional
681    jumps, not at computed jump or other complicated cases.  */
682 static bool
can_predict_insn_p(const_rtx insn)683 can_predict_insn_p (const_rtx insn)
684 {
685   return (JUMP_P (insn)
686 	  && any_condjump_p (insn)
687 	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
688 }
689 
690 /* Predict edge E by given predictor if possible.  */
691 
692 void
predict_edge_def(edge e,enum br_predictor predictor,enum prediction taken)693 predict_edge_def (edge e, enum br_predictor predictor,
694 		  enum prediction taken)
695 {
696    int probability = predictor_info[(int) predictor].hitrate;
697 
698    if (taken != TAKEN)
699      probability = REG_BR_PROB_BASE - probability;
700 
701    predict_edge (e, predictor, probability);
702 }
703 
704 /* Invert all branch predictions or probability notes in the INSN.  This needs
705    to be done each time we invert the condition used by the jump.  */
706 
707 void
invert_br_probabilities(rtx insn)708 invert_br_probabilities (rtx insn)
709 {
710   rtx note;
711 
712   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
713     if (REG_NOTE_KIND (note) == REG_BR_PROB)
714       XINT (note, 0) = REG_BR_PROB_BASE - XINT (note, 0);
715     else if (REG_NOTE_KIND (note) == REG_BR_PRED)
716       XEXP (XEXP (note, 0), 1)
717 	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
718 }
719 
720 /* Dump information about the branch prediction to the output file.  */
721 
722 static void
dump_prediction(FILE * file,enum br_predictor predictor,int probability,basic_block bb,int used)723 dump_prediction (FILE *file, enum br_predictor predictor, int probability,
724 		 basic_block bb, int used)
725 {
726   edge e;
727   edge_iterator ei;
728 
729   if (!file)
730     return;
731 
732   FOR_EACH_EDGE (e, ei, bb->succs)
733     if (! (e->flags & EDGE_FALLTHRU))
734       break;
735 
736   fprintf (file, "  %s heuristics%s: %.1f%%",
737 	   predictor_info[predictor].name,
738 	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
739 
740   if (bb->count)
741     {
742       fprintf (file, "  exec ");
743       fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
744       if (e)
745 	{
746 	  fprintf (file, " hit ");
747 	  fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
748 	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
749 	}
750     }
751 
752   fprintf (file, "\n");
753 }
754 
755 /* We can not predict the probabilities of outgoing edges of bb.  Set them
756    evenly and hope for the best.  */
757 static void
set_even_probabilities(basic_block bb)758 set_even_probabilities (basic_block bb)
759 {
760   int nedges = 0;
761   edge e;
762   edge_iterator ei;
763 
764   FOR_EACH_EDGE (e, ei, bb->succs)
765     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
766       nedges ++;
767   FOR_EACH_EDGE (e, ei, bb->succs)
768     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
769       e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
770     else
771       e->probability = 0;
772 }
773 
774 /* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
775    note if not already present.  Remove now useless REG_BR_PRED notes.  */
776 
777 static void
combine_predictions_for_insn(rtx insn,basic_block bb)778 combine_predictions_for_insn (rtx insn, basic_block bb)
779 {
780   rtx prob_note;
781   rtx *pnote;
782   rtx note;
783   int best_probability = PROB_EVEN;
784   enum br_predictor best_predictor = END_PREDICTORS;
785   int combined_probability = REG_BR_PROB_BASE / 2;
786   int d;
787   bool first_match = false;
788   bool found = false;
789 
790   if (!can_predict_insn_p (insn))
791     {
792       set_even_probabilities (bb);
793       return;
794     }
795 
796   prob_note = find_reg_note (insn, REG_BR_PROB, 0);
797   pnote = &REG_NOTES (insn);
798   if (dump_file)
799     fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
800 	     bb->index);
801 
802   /* We implement "first match" heuristics and use probability guessed
803      by predictor with smallest index.  */
804   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
805     if (REG_NOTE_KIND (note) == REG_BR_PRED)
806       {
807 	enum br_predictor predictor = ((enum br_predictor)
808 				       INTVAL (XEXP (XEXP (note, 0), 0)));
809 	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
810 
811 	found = true;
812 	if (best_predictor > predictor)
813 	  best_probability = probability, best_predictor = predictor;
814 
815 	d = (combined_probability * probability
816 	     + (REG_BR_PROB_BASE - combined_probability)
817 	     * (REG_BR_PROB_BASE - probability));
818 
819 	/* Use FP math to avoid overflows of 32bit integers.  */
820 	if (d == 0)
821 	  /* If one probability is 0% and one 100%, avoid division by zero.  */
822 	  combined_probability = REG_BR_PROB_BASE / 2;
823 	else
824 	  combined_probability = (((double) combined_probability) * probability
825 				  * REG_BR_PROB_BASE / d + 0.5);
826       }
827 
828   /* Decide which heuristic to use.  In case we didn't match anything,
829      use no_prediction heuristic, in case we did match, use either
830      first match or Dempster-Shaffer theory depending on the flags.  */
831 
832   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
833     first_match = true;
834 
835   if (!found)
836     dump_prediction (dump_file, PRED_NO_PREDICTION,
837 		     combined_probability, bb, true);
838   else
839     {
840       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
841 		       bb, !first_match);
842       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
843 		       bb, first_match);
844     }
845 
846   if (first_match)
847     combined_probability = best_probability;
848   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
849 
850   while (*pnote)
851     {
852       if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
853 	{
854 	  enum br_predictor predictor = ((enum br_predictor)
855 					 INTVAL (XEXP (XEXP (*pnote, 0), 0)));
856 	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
857 
858 	  dump_prediction (dump_file, predictor, probability, bb,
859 			   !first_match || best_predictor == predictor);
860 	  *pnote = XEXP (*pnote, 1);
861 	}
862       else
863 	pnote = &XEXP (*pnote, 1);
864     }
865 
866   if (!prob_note)
867     {
868       add_int_reg_note (insn, REG_BR_PROB, combined_probability);
869 
870       /* Save the prediction into CFG in case we are seeing non-degenerated
871 	 conditional jump.  */
872       if (!single_succ_p (bb))
873 	{
874 	  BRANCH_EDGE (bb)->probability = combined_probability;
875 	  FALLTHRU_EDGE (bb)->probability
876 	    = REG_BR_PROB_BASE - combined_probability;
877 	}
878     }
879   else if (!single_succ_p (bb))
880     {
881       int prob = XINT (prob_note, 0);
882 
883       BRANCH_EDGE (bb)->probability = prob;
884       FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
885     }
886   else
887     single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
888 }
889 
890 /* Combine predictions into single probability and store them into CFG.
891    Remove now useless prediction entries.  */
892 
893 static void
combine_predictions_for_bb(basic_block bb)894 combine_predictions_for_bb (basic_block bb)
895 {
896   int best_probability = PROB_EVEN;
897   enum br_predictor best_predictor = END_PREDICTORS;
898   int combined_probability = REG_BR_PROB_BASE / 2;
899   int d;
900   bool first_match = false;
901   bool found = false;
902   struct edge_prediction *pred;
903   int nedges = 0;
904   edge e, first = NULL, second = NULL;
905   edge_iterator ei;
906   void **preds;
907 
908   FOR_EACH_EDGE (e, ei, bb->succs)
909     if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
910       {
911 	nedges ++;
912 	if (first && !second)
913 	  second = e;
914 	if (!first)
915 	  first = e;
916       }
917 
918   /* When there is no successor or only one choice, prediction is easy.
919 
920      We are lazy for now and predict only basic blocks with two outgoing
921      edges.  It is possible to predict generic case too, but we have to
922      ignore first match heuristics and do more involved combining.  Implement
923      this later.  */
924   if (nedges != 2)
925     {
926       if (!bb->count)
927 	set_even_probabilities (bb);
928       clear_bb_predictions (bb);
929       if (dump_file)
930 	fprintf (dump_file, "%i edges in bb %i predicted to even probabilities\n",
931 		 nedges, bb->index);
932       return;
933     }
934 
935   if (dump_file)
936     fprintf (dump_file, "Predictions for bb %i\n", bb->index);
937 
938   preds = pointer_map_contains (bb_predictions, bb);
939   if (preds)
940     {
941       /* We implement "first match" heuristics and use probability guessed
942 	 by predictor with smallest index.  */
943       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
944 	{
945 	  enum br_predictor predictor = pred->ep_predictor;
946 	  int probability = pred->ep_probability;
947 
948 	  if (pred->ep_edge != first)
949 	    probability = REG_BR_PROB_BASE - probability;
950 
951 	  found = true;
952 	  /* First match heuristics would be widly confused if we predicted
953 	     both directions.  */
954 	  if (best_predictor > predictor)
955 	    {
956               struct edge_prediction *pred2;
957 	      int prob = probability;
958 
959 	      for (pred2 = (struct edge_prediction *) *preds;
960 		   pred2; pred2 = pred2->ep_next)
961 	       if (pred2 != pred && pred2->ep_predictor == pred->ep_predictor)
962 	         {
963 	           int probability2 = pred->ep_probability;
964 
965 		   if (pred2->ep_edge != first)
966 		     probability2 = REG_BR_PROB_BASE - probability2;
967 
968 		   if ((probability < REG_BR_PROB_BASE / 2) !=
969 		       (probability2 < REG_BR_PROB_BASE / 2))
970 		     break;
971 
972 		   /* If the same predictor later gave better result, go for it! */
973 		   if ((probability >= REG_BR_PROB_BASE / 2 && (probability2 > probability))
974 		       || (probability <= REG_BR_PROB_BASE / 2 && (probability2 < probability)))
975 		     prob = probability2;
976 		 }
977 	      if (!pred2)
978 	        best_probability = prob, best_predictor = predictor;
979 	    }
980 
981 	  d = (combined_probability * probability
982 	       + (REG_BR_PROB_BASE - combined_probability)
983 	       * (REG_BR_PROB_BASE - probability));
984 
985 	  /* Use FP math to avoid overflows of 32bit integers.  */
986 	  if (d == 0)
987 	    /* If one probability is 0% and one 100%, avoid division by zero.  */
988 	    combined_probability = REG_BR_PROB_BASE / 2;
989 	  else
990 	    combined_probability = (((double) combined_probability)
991 				    * probability
992 		    		    * REG_BR_PROB_BASE / d + 0.5);
993 	}
994     }
995 
996   /* Decide which heuristic to use.  In case we didn't match anything,
997      use no_prediction heuristic, in case we did match, use either
998      first match or Dempster-Shaffer theory depending on the flags.  */
999 
1000   if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
1001     first_match = true;
1002 
1003   if (!found)
1004     dump_prediction (dump_file, PRED_NO_PREDICTION, combined_probability, bb, true);
1005   else
1006     {
1007       dump_prediction (dump_file, PRED_DS_THEORY, combined_probability, bb,
1008 		       !first_match);
1009       dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability, bb,
1010 		       first_match);
1011     }
1012 
1013   if (first_match)
1014     combined_probability = best_probability;
1015   dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
1016 
1017   if (preds)
1018     {
1019       for (pred = (struct edge_prediction *) *preds; pred; pred = pred->ep_next)
1020 	{
1021 	  enum br_predictor predictor = pred->ep_predictor;
1022 	  int probability = pred->ep_probability;
1023 
1024 	  if (pred->ep_edge != EDGE_SUCC (bb, 0))
1025 	    probability = REG_BR_PROB_BASE - probability;
1026 	  dump_prediction (dump_file, predictor, probability, bb,
1027 			   !first_match || best_predictor == predictor);
1028 	}
1029     }
1030   clear_bb_predictions (bb);
1031 
1032   if (!bb->count)
1033     {
1034       first->probability = combined_probability;
1035       second->probability = REG_BR_PROB_BASE - combined_probability;
1036     }
1037 }
1038 
1039 /* Check if T1 and T2 satisfy the IV_COMPARE condition.
1040    Return the SSA_NAME if the condition satisfies, NULL otherwise.
1041 
1042    T1 and T2 should be one of the following cases:
1043      1. T1 is SSA_NAME, T2 is NULL
1044      2. T1 is SSA_NAME, T2 is INTEGER_CST between [-4, 4]
1045      3. T2 is SSA_NAME, T1 is INTEGER_CST between [-4, 4]  */
1046 
1047 static tree
strips_small_constant(tree t1,tree t2)1048 strips_small_constant (tree t1, tree t2)
1049 {
1050   tree ret = NULL;
1051   int value = 0;
1052 
1053   if (!t1)
1054     return NULL;
1055   else if (TREE_CODE (t1) == SSA_NAME)
1056     ret = t1;
1057   else if (tree_fits_shwi_p (t1))
1058     value = tree_to_shwi (t1);
1059   else
1060     return NULL;
1061 
1062   if (!t2)
1063     return ret;
1064   else if (tree_fits_shwi_p (t2))
1065     value = tree_to_shwi (t2);
1066   else if (TREE_CODE (t2) == SSA_NAME)
1067     {
1068       if (ret)
1069         return NULL;
1070       else
1071         ret = t2;
1072     }
1073 
1074   if (value <= 4 && value >= -4)
1075     return ret;
1076   else
1077     return NULL;
1078 }
1079 
1080 /* Return the SSA_NAME in T or T's operands.
1081    Return NULL if SSA_NAME cannot be found.  */
1082 
1083 static tree
get_base_value(tree t)1084 get_base_value (tree t)
1085 {
1086   if (TREE_CODE (t) == SSA_NAME)
1087     return t;
1088 
1089   if (!BINARY_CLASS_P (t))
1090     return NULL;
1091 
1092   switch (TREE_OPERAND_LENGTH (t))
1093     {
1094     case 1:
1095       return strips_small_constant (TREE_OPERAND (t, 0), NULL);
1096     case 2:
1097       return strips_small_constant (TREE_OPERAND (t, 0),
1098 				    TREE_OPERAND (t, 1));
1099     default:
1100       return NULL;
1101     }
1102 }
1103 
1104 /* Check the compare STMT in LOOP. If it compares an induction
1105    variable to a loop invariant, return true, and save
1106    LOOP_INVARIANT, COMPARE_CODE and LOOP_STEP.
1107    Otherwise return false and set LOOP_INVAIANT to NULL.  */
1108 
1109 static bool
is_comparison_with_loop_invariant_p(gimple stmt,struct loop * loop,tree * loop_invariant,enum tree_code * compare_code,tree * loop_step,tree * loop_iv_base)1110 is_comparison_with_loop_invariant_p (gimple stmt, struct loop *loop,
1111 				     tree *loop_invariant,
1112 				     enum tree_code *compare_code,
1113 				     tree *loop_step,
1114 				     tree *loop_iv_base)
1115 {
1116   tree op0, op1, bound, base;
1117   affine_iv iv0, iv1;
1118   enum tree_code code;
1119   tree step;
1120 
1121   code = gimple_cond_code (stmt);
1122   *loop_invariant = NULL;
1123 
1124   switch (code)
1125     {
1126     case GT_EXPR:
1127     case GE_EXPR:
1128     case NE_EXPR:
1129     case LT_EXPR:
1130     case LE_EXPR:
1131     case EQ_EXPR:
1132       break;
1133 
1134     default:
1135       return false;
1136     }
1137 
1138   op0 = gimple_cond_lhs (stmt);
1139   op1 = gimple_cond_rhs (stmt);
1140 
1141   if ((TREE_CODE (op0) != SSA_NAME && TREE_CODE (op0) != INTEGER_CST)
1142        || (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op1) != INTEGER_CST))
1143     return false;
1144   if (!simple_iv (loop, loop_containing_stmt (stmt), op0, &iv0, true))
1145     return false;
1146   if (!simple_iv (loop, loop_containing_stmt (stmt), op1, &iv1, true))
1147     return false;
1148   if (TREE_CODE (iv0.step) != INTEGER_CST
1149       || TREE_CODE (iv1.step) != INTEGER_CST)
1150     return false;
1151   if ((integer_zerop (iv0.step) && integer_zerop (iv1.step))
1152       || (!integer_zerop (iv0.step) && !integer_zerop (iv1.step)))
1153     return false;
1154 
1155   if (integer_zerop (iv0.step))
1156     {
1157       if (code != NE_EXPR && code != EQ_EXPR)
1158 	code = invert_tree_comparison (code, false);
1159       bound = iv0.base;
1160       base = iv1.base;
1161       if (tree_fits_shwi_p (iv1.step))
1162 	step = iv1.step;
1163       else
1164 	return false;
1165     }
1166   else
1167     {
1168       bound = iv1.base;
1169       base = iv0.base;
1170       if (tree_fits_shwi_p (iv0.step))
1171 	step = iv0.step;
1172       else
1173 	return false;
1174     }
1175 
1176   if (TREE_CODE (bound) != INTEGER_CST)
1177     bound = get_base_value (bound);
1178   if (!bound)
1179     return false;
1180   if (TREE_CODE (base) != INTEGER_CST)
1181     base = get_base_value (base);
1182   if (!base)
1183     return false;
1184 
1185   *loop_invariant = bound;
1186   *compare_code = code;
1187   *loop_step = step;
1188   *loop_iv_base = base;
1189   return true;
1190 }
1191 
1192 /* Compare two SSA_NAMEs: returns TRUE if T1 and T2 are value coherent.  */
1193 
1194 static bool
expr_coherent_p(tree t1,tree t2)1195 expr_coherent_p (tree t1, tree t2)
1196 {
1197   gimple stmt;
1198   tree ssa_name_1 = NULL;
1199   tree ssa_name_2 = NULL;
1200 
1201   gcc_assert (TREE_CODE (t1) == SSA_NAME || TREE_CODE (t1) == INTEGER_CST);
1202   gcc_assert (TREE_CODE (t2) == SSA_NAME || TREE_CODE (t2) == INTEGER_CST);
1203 
1204   if (t1 == t2)
1205     return true;
1206 
1207   if (TREE_CODE (t1) == INTEGER_CST && TREE_CODE (t2) == INTEGER_CST)
1208     return true;
1209   if (TREE_CODE (t1) == INTEGER_CST || TREE_CODE (t2) == INTEGER_CST)
1210     return false;
1211 
1212   /* Check to see if t1 is expressed/defined with t2.  */
1213   stmt = SSA_NAME_DEF_STMT (t1);
1214   gcc_assert (stmt != NULL);
1215   if (is_gimple_assign (stmt))
1216     {
1217       ssa_name_1 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1218       if (ssa_name_1 && ssa_name_1 == t2)
1219 	return true;
1220     }
1221 
1222   /* Check to see if t2 is expressed/defined with t1.  */
1223   stmt = SSA_NAME_DEF_STMT (t2);
1224   gcc_assert (stmt != NULL);
1225   if (is_gimple_assign (stmt))
1226     {
1227       ssa_name_2 = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
1228       if (ssa_name_2 && ssa_name_2 == t1)
1229 	return true;
1230     }
1231 
1232   /* Compare if t1 and t2's def_stmts are identical.  */
1233   if (ssa_name_2 != NULL && ssa_name_1 == ssa_name_2)
1234     return true;
1235   else
1236     return false;
1237 }
1238 
1239 /* Predict branch probability of BB when BB contains a branch that compares
1240    an induction variable in LOOP with LOOP_IV_BASE_VAR to LOOP_BOUND_VAR. The
1241    loop exit is compared using LOOP_BOUND_CODE, with step of LOOP_BOUND_STEP.
1242 
1243    E.g.
1244      for (int i = 0; i < bound; i++) {
1245        if (i < bound - 2)
1246 	 computation_1();
1247        else
1248 	 computation_2();
1249      }
1250 
1251   In this loop, we will predict the branch inside the loop to be taken.  */
1252 
1253 static void
predict_iv_comparison(struct loop * loop,basic_block bb,tree loop_bound_var,tree loop_iv_base_var,enum tree_code loop_bound_code,int loop_bound_step)1254 predict_iv_comparison (struct loop *loop, basic_block bb,
1255 		       tree loop_bound_var,
1256 		       tree loop_iv_base_var,
1257 		       enum tree_code loop_bound_code,
1258 		       int loop_bound_step)
1259 {
1260   gimple stmt;
1261   tree compare_var, compare_base;
1262   enum tree_code compare_code;
1263   tree compare_step_var;
1264   edge then_edge;
1265   edge_iterator ei;
1266 
1267   if (predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1268       || predicted_by_p (bb, PRED_LOOP_ITERATIONS)
1269       || predicted_by_p (bb, PRED_LOOP_EXIT))
1270     return;
1271 
1272   stmt = last_stmt (bb);
1273   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
1274     return;
1275   if (!is_comparison_with_loop_invariant_p (stmt, loop, &compare_var,
1276 					    &compare_code,
1277 					    &compare_step_var,
1278 					    &compare_base))
1279     return;
1280 
1281   /* Find the taken edge.  */
1282   FOR_EACH_EDGE (then_edge, ei, bb->succs)
1283     if (then_edge->flags & EDGE_TRUE_VALUE)
1284       break;
1285 
1286   /* When comparing an IV to a loop invariant, NE is more likely to be
1287      taken while EQ is more likely to be not-taken.  */
1288   if (compare_code == NE_EXPR)
1289     {
1290       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1291       return;
1292     }
1293   else if (compare_code == EQ_EXPR)
1294     {
1295       predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1296       return;
1297     }
1298 
1299   if (!expr_coherent_p (loop_iv_base_var, compare_base))
1300     return;
1301 
1302   /* If loop bound, base and compare bound are all constants, we can
1303      calculate the probability directly.  */
1304   if (tree_fits_shwi_p (loop_bound_var)
1305       && tree_fits_shwi_p (compare_var)
1306       && tree_fits_shwi_p (compare_base))
1307     {
1308       int probability;
1309       bool of, overflow = false;
1310       double_int mod, compare_count, tem, loop_count;
1311 
1312       double_int loop_bound = tree_to_double_int (loop_bound_var);
1313       double_int compare_bound = tree_to_double_int (compare_var);
1314       double_int base = tree_to_double_int (compare_base);
1315       double_int compare_step = tree_to_double_int (compare_step_var);
1316 
1317       /* (loop_bound - base) / compare_step */
1318       tem = loop_bound.sub_with_overflow (base, &of);
1319       overflow |= of;
1320       loop_count = tem.divmod_with_overflow (compare_step,
1321 					      0, TRUNC_DIV_EXPR,
1322 					      &mod, &of);
1323       overflow |= of;
1324 
1325       if ((!compare_step.is_negative ())
1326           ^ (compare_code == LT_EXPR || compare_code == LE_EXPR))
1327 	{
1328 	  /* (loop_bound - compare_bound) / compare_step */
1329 	  tem = loop_bound.sub_with_overflow (compare_bound, &of);
1330 	  overflow |= of;
1331 	  compare_count = tem.divmod_with_overflow (compare_step,
1332 						     0, TRUNC_DIV_EXPR,
1333 						     &mod, &of);
1334 	  overflow |= of;
1335 	}
1336       else
1337         {
1338 	  /* (compare_bound - base) / compare_step */
1339 	  tem = compare_bound.sub_with_overflow (base, &of);
1340 	  overflow |= of;
1341           compare_count = tem.divmod_with_overflow (compare_step,
1342 						     0, TRUNC_DIV_EXPR,
1343 						     &mod, &of);
1344 	  overflow |= of;
1345 	}
1346       if (compare_code == LE_EXPR || compare_code == GE_EXPR)
1347 	++compare_count;
1348       if (loop_bound_code == LE_EXPR || loop_bound_code == GE_EXPR)
1349 	++loop_count;
1350       if (compare_count.is_negative ())
1351         compare_count = double_int_zero;
1352       if (loop_count.is_negative ())
1353         loop_count = double_int_zero;
1354       if (loop_count.is_zero ())
1355 	probability = 0;
1356       else if (compare_count.scmp (loop_count) == 1)
1357 	probability = REG_BR_PROB_BASE;
1358       else
1359         {
1360 	  /* If loop_count is too big, such that REG_BR_PROB_BASE * loop_count
1361 	     could overflow, shift both loop_count and compare_count right
1362 	     a bit so that it doesn't overflow.  Note both counts are known not
1363 	     to be negative at this point.  */
1364 	  int clz_bits = clz_hwi (loop_count.high);
1365 	  gcc_assert (REG_BR_PROB_BASE < 32768);
1366 	  if (clz_bits < 16)
1367 	    {
1368 	      loop_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1369 	      compare_count.arshift (16 - clz_bits, HOST_BITS_PER_DOUBLE_INT);
1370 	    }
1371 	  tem = compare_count.mul_with_sign (double_int::from_shwi
1372 					    (REG_BR_PROB_BASE), true, &of);
1373 	  gcc_assert (!of);
1374 	  tem = tem.divmod (loop_count, true, TRUNC_DIV_EXPR, &mod);
1375 	  probability = tem.to_uhwi ();
1376 	}
1377 
1378       if (!overflow)
1379         predict_edge (then_edge, PRED_LOOP_IV_COMPARE, probability);
1380 
1381       return;
1382     }
1383 
1384   if (expr_coherent_p (loop_bound_var, compare_var))
1385     {
1386       if ((loop_bound_code == LT_EXPR || loop_bound_code == LE_EXPR)
1387 	  && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1388 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1389       else if ((loop_bound_code == GT_EXPR || loop_bound_code == GE_EXPR)
1390 	       && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1391 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1392       else if (loop_bound_code == NE_EXPR)
1393 	{
1394 	  /* If the loop backedge condition is "(i != bound)", we do
1395 	     the comparison based on the step of IV:
1396 	     * step < 0 : backedge condition is like (i > bound)
1397 	     * step > 0 : backedge condition is like (i < bound)  */
1398 	  gcc_assert (loop_bound_step != 0);
1399 	  if (loop_bound_step > 0
1400 	      && (compare_code == LT_EXPR
1401 		  || compare_code == LE_EXPR))
1402 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1403 	  else if (loop_bound_step < 0
1404 		   && (compare_code == GT_EXPR
1405 		       || compare_code == GE_EXPR))
1406 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1407 	  else
1408 	    predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1409 	}
1410       else
1411 	/* The branch is predicted not-taken if loop_bound_code is
1412 	   opposite with compare_code.  */
1413 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1414     }
1415   else if (expr_coherent_p (loop_iv_base_var, compare_var))
1416     {
1417       /* For cases like:
1418 	   for (i = s; i < h; i++)
1419 	     if (i > s + 2) ....
1420 	 The branch should be predicted taken.  */
1421       if (loop_bound_step > 0
1422 	  && (compare_code == GT_EXPR || compare_code == GE_EXPR))
1423 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1424       else if (loop_bound_step < 0
1425 	       && (compare_code == LT_EXPR || compare_code == LE_EXPR))
1426 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, TAKEN);
1427       else
1428 	predict_edge_def (then_edge, PRED_LOOP_IV_COMPARE_GUESS, NOT_TAKEN);
1429     }
1430 }
1431 
1432 /* Predict for extra loop exits that will lead to EXIT_EDGE. The extra loop
1433    exits are resulted from short-circuit conditions that will generate an
1434    if_tmp. E.g.:
1435 
1436    if (foo() || global > 10)
1437      break;
1438 
1439    This will be translated into:
1440 
1441    BB3:
1442      loop header...
1443    BB4:
1444      if foo() goto BB6 else goto BB5
1445    BB5:
1446      if global > 10 goto BB6 else goto BB7
1447    BB6:
1448      goto BB7
1449    BB7:
1450      iftmp = (PHI 0(BB5), 1(BB6))
1451      if iftmp == 1 goto BB8 else goto BB3
1452    BB8:
1453      outside of the loop...
1454 
1455    The edge BB7->BB8 is loop exit because BB8 is outside of the loop.
1456    From the dataflow, we can infer that BB4->BB6 and BB5->BB6 are also loop
1457    exits. This function takes BB7->BB8 as input, and finds out the extra loop
1458    exits to predict them using PRED_LOOP_EXIT.  */
1459 
1460 static void
predict_extra_loop_exits(edge exit_edge)1461 predict_extra_loop_exits (edge exit_edge)
1462 {
1463   unsigned i;
1464   bool check_value_one;
1465   gimple phi_stmt;
1466   tree cmp_rhs, cmp_lhs;
1467   gimple cmp_stmt = last_stmt (exit_edge->src);
1468 
1469   if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1470     return;
1471   cmp_rhs = gimple_cond_rhs (cmp_stmt);
1472   cmp_lhs = gimple_cond_lhs (cmp_stmt);
1473   if (!TREE_CONSTANT (cmp_rhs)
1474       || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1475     return;
1476   if (TREE_CODE (cmp_lhs) != SSA_NAME)
1477     return;
1478 
1479   /* If check_value_one is true, only the phi_args with value '1' will lead
1480      to loop exit. Otherwise, only the phi_args with value '0' will lead to
1481      loop exit.  */
1482   check_value_one = (((integer_onep (cmp_rhs))
1483 		    ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1484 		    ^ ((exit_edge->flags & EDGE_TRUE_VALUE) != 0));
1485 
1486   phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1487   if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
1488     return;
1489 
1490   for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1491     {
1492       edge e1;
1493       edge_iterator ei;
1494       tree val = gimple_phi_arg_def (phi_stmt, i);
1495       edge e = gimple_phi_arg_edge (phi_stmt, i);
1496 
1497       if (!TREE_CONSTANT (val) || !(integer_zerop (val) || integer_onep (val)))
1498 	continue;
1499       if ((check_value_one ^ integer_onep (val)) == 1)
1500 	continue;
1501       if (EDGE_COUNT (e->src->succs) != 1)
1502 	{
1503 	  predict_paths_leading_to_edge (e, PRED_LOOP_EXIT, NOT_TAKEN);
1504 	  continue;
1505 	}
1506 
1507       FOR_EACH_EDGE (e1, ei, e->src->preds)
1508 	predict_paths_leading_to_edge (e1, PRED_LOOP_EXIT, NOT_TAKEN);
1509     }
1510 }
1511 
1512 /* Predict edge probabilities by exploiting loop structure.  */
1513 
1514 static void
predict_loops(void)1515 predict_loops (void)
1516 {
1517   struct loop *loop;
1518 
1519   /* Try to predict out blocks in a loop that are not part of a
1520      natural loop.  */
1521   FOR_EACH_LOOP (loop, 0)
1522     {
1523       basic_block bb, *bbs;
1524       unsigned j, n_exits;
1525       vec<edge> exits;
1526       struct tree_niter_desc niter_desc;
1527       edge ex;
1528       struct nb_iter_bound *nb_iter;
1529       enum tree_code loop_bound_code = ERROR_MARK;
1530       tree loop_bound_step = NULL;
1531       tree loop_bound_var = NULL;
1532       tree loop_iv_base = NULL;
1533       gimple stmt = NULL;
1534 
1535       exits = get_loop_exit_edges (loop);
1536       n_exits = exits.length ();
1537       if (!n_exits)
1538 	{
1539           exits.release ();
1540 	  continue;
1541 	}
1542 
1543       FOR_EACH_VEC_ELT (exits, j, ex)
1544 	{
1545 	  tree niter = NULL;
1546 	  HOST_WIDE_INT nitercst;
1547 	  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
1548 	  int probability;
1549 	  enum br_predictor predictor;
1550 
1551 	  predict_extra_loop_exits (ex);
1552 
1553 	  if (number_of_iterations_exit (loop, ex, &niter_desc, false, false))
1554 	    niter = niter_desc.niter;
1555 	  if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
1556 	    niter = loop_niter_by_eval (loop, ex);
1557 
1558 	  if (TREE_CODE (niter) == INTEGER_CST)
1559 	    {
1560 	      if (tree_fits_uhwi_p (niter)
1561 		  && max
1562 		  && compare_tree_int (niter, max - 1) == -1)
1563 		nitercst = tree_to_uhwi (niter) + 1;
1564 	      else
1565 		nitercst = max;
1566 	      predictor = PRED_LOOP_ITERATIONS;
1567 	    }
1568 	  /* If we have just one exit and we can derive some information about
1569 	     the number of iterations of the loop from the statements inside
1570 	     the loop, use it to predict this exit.  */
1571 	  else if (n_exits == 1)
1572 	    {
1573 	      nitercst = estimated_stmt_executions_int (loop);
1574 	      if (nitercst < 0)
1575 		continue;
1576 	      if (nitercst > max)
1577 		nitercst = max;
1578 
1579 	      predictor = PRED_LOOP_ITERATIONS_GUESSED;
1580 	    }
1581 	  else
1582 	    continue;
1583 
1584 	  /* If the prediction for number of iterations is zero, do not
1585 	     predict the exit edges.  */
1586 	  if (nitercst == 0)
1587 	    continue;
1588 
1589 	  probability = ((REG_BR_PROB_BASE + nitercst / 2) / nitercst);
1590 	  predict_edge (ex, predictor, probability);
1591 	}
1592       exits.release ();
1593 
1594       /* Find information about loop bound variables.  */
1595       for (nb_iter = loop->bounds; nb_iter;
1596 	   nb_iter = nb_iter->next)
1597 	if (nb_iter->stmt
1598 	    && gimple_code (nb_iter->stmt) == GIMPLE_COND)
1599 	  {
1600 	    stmt = nb_iter->stmt;
1601 	    break;
1602 	  }
1603       if (!stmt && last_stmt (loop->header)
1604 	  && gimple_code (last_stmt (loop->header)) == GIMPLE_COND)
1605 	stmt = last_stmt (loop->header);
1606       if (stmt)
1607 	is_comparison_with_loop_invariant_p (stmt, loop,
1608 					     &loop_bound_var,
1609 					     &loop_bound_code,
1610 					     &loop_bound_step,
1611 					     &loop_iv_base);
1612 
1613       bbs = get_loop_body (loop);
1614 
1615       for (j = 0; j < loop->num_nodes; j++)
1616 	{
1617 	  int header_found = 0;
1618 	  edge e;
1619 	  edge_iterator ei;
1620 
1621 	  bb = bbs[j];
1622 
1623 	  /* Bypass loop heuristics on continue statement.  These
1624 	     statements construct loops via "non-loop" constructs
1625 	     in the source language and are better to be handled
1626 	     separately.  */
1627 	  if (predicted_by_p (bb, PRED_CONTINUE))
1628 	    continue;
1629 
1630 	  /* Loop branch heuristics - predict an edge back to a
1631 	     loop's head as taken.  */
1632 	  if (bb == loop->latch)
1633 	    {
1634 	      e = find_edge (loop->latch, loop->header);
1635 	      if (e)
1636 		{
1637 		  header_found = 1;
1638 		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
1639 		}
1640 	    }
1641 
1642 	  /* Loop exit heuristics - predict an edge exiting the loop if the
1643 	     conditional has no loop header successors as not taken.  */
1644 	  if (!header_found
1645 	      /* If we already used more reliable loop exit predictors, do not
1646 		 bother with PRED_LOOP_EXIT.  */
1647 	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
1648 	      && !predicted_by_p (bb, PRED_LOOP_ITERATIONS))
1649 	    {
1650 	      /* For loop with many exits we don't want to predict all exits
1651 	         with the pretty large probability, because if all exits are
1652 		 considered in row, the loop would be predicted to iterate
1653 		 almost never.  The code to divide probability by number of
1654 		 exits is very rough.  It should compute the number of exits
1655 		 taken in each patch through function (not the overall number
1656 		 of exits that might be a lot higher for loops with wide switch
1657 		 statements in them) and compute n-th square root.
1658 
1659 		 We limit the minimal probability by 2% to avoid
1660 		 EDGE_PROBABILITY_RELIABLE from trusting the branch prediction
1661 		 as this was causing regression in perl benchmark containing such
1662 		 a wide loop.  */
1663 
1664 	      int probability = ((REG_BR_PROB_BASE
1665 		                  - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
1666 				 / n_exits);
1667 	      if (probability < HITRATE (2))
1668 		probability = HITRATE (2);
1669 	      FOR_EACH_EDGE (e, ei, bb->succs)
1670 		if (e->dest->index < NUM_FIXED_BLOCKS
1671 		    || !flow_bb_inside_loop_p (loop, e->dest))
1672 		  predict_edge (e, PRED_LOOP_EXIT, probability);
1673 	    }
1674 	  if (loop_bound_var)
1675 	    predict_iv_comparison (loop, bb, loop_bound_var, loop_iv_base,
1676 				   loop_bound_code,
1677 				   tree_to_shwi (loop_bound_step));
1678 	}
1679 
1680       /* Free basic blocks from get_loop_body.  */
1681       free (bbs);
1682     }
1683 }
1684 
1685 /* Attempt to predict probabilities of BB outgoing edges using local
1686    properties.  */
1687 static void
bb_estimate_probability_locally(basic_block bb)1688 bb_estimate_probability_locally (basic_block bb)
1689 {
1690   rtx last_insn = BB_END (bb);
1691   rtx cond;
1692 
1693   if (! can_predict_insn_p (last_insn))
1694     return;
1695   cond = get_condition (last_insn, NULL, false, false);
1696   if (! cond)
1697     return;
1698 
1699   /* Try "pointer heuristic."
1700      A comparison ptr == 0 is predicted as false.
1701      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1702   if (COMPARISON_P (cond)
1703       && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
1704 	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
1705     {
1706       if (GET_CODE (cond) == EQ)
1707 	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
1708       else if (GET_CODE (cond) == NE)
1709 	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
1710     }
1711   else
1712 
1713   /* Try "opcode heuristic."
1714      EQ tests are usually false and NE tests are usually true. Also,
1715      most quantities are positive, so we can make the appropriate guesses
1716      about signed comparisons against zero.  */
1717     switch (GET_CODE (cond))
1718       {
1719       case CONST_INT:
1720 	/* Unconditional branch.  */
1721 	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
1722 			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
1723 	break;
1724 
1725       case EQ:
1726       case UNEQ:
1727 	/* Floating point comparisons appears to behave in a very
1728 	   unpredictable way because of special role of = tests in
1729 	   FP code.  */
1730 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1731 	  ;
1732 	/* Comparisons with 0 are often used for booleans and there is
1733 	   nothing useful to predict about them.  */
1734 	else if (XEXP (cond, 1) == const0_rtx
1735 		 || XEXP (cond, 0) == const0_rtx)
1736 	  ;
1737 	else
1738 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
1739 	break;
1740 
1741       case NE:
1742       case LTGT:
1743 	/* Floating point comparisons appears to behave in a very
1744 	   unpredictable way because of special role of = tests in
1745 	   FP code.  */
1746 	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
1747 	  ;
1748 	/* Comparisons with 0 are often used for booleans and there is
1749 	   nothing useful to predict about them.  */
1750 	else if (XEXP (cond, 1) == const0_rtx
1751 		 || XEXP (cond, 0) == const0_rtx)
1752 	  ;
1753 	else
1754 	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
1755 	break;
1756 
1757       case ORDERED:
1758 	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
1759 	break;
1760 
1761       case UNORDERED:
1762 	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
1763 	break;
1764 
1765       case LE:
1766       case LT:
1767 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1768 	    || XEXP (cond, 1) == constm1_rtx)
1769 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
1770 	break;
1771 
1772       case GE:
1773       case GT:
1774 	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
1775 	    || XEXP (cond, 1) == constm1_rtx)
1776 	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
1777 	break;
1778 
1779       default:
1780 	break;
1781       }
1782 }
1783 
1784 /* Set edge->probability for each successor edge of BB.  */
1785 void
guess_outgoing_edge_probabilities(basic_block bb)1786 guess_outgoing_edge_probabilities (basic_block bb)
1787 {
1788   bb_estimate_probability_locally (bb);
1789   combine_predictions_for_insn (BB_END (bb), bb);
1790 }
1791 
1792 static tree expr_expected_value (tree, bitmap, enum br_predictor *predictor);
1793 
1794 /* Helper function for expr_expected_value.  */
1795 
1796 static tree
expr_expected_value_1(tree type,tree op0,enum tree_code code,tree op1,bitmap visited,enum br_predictor * predictor)1797 expr_expected_value_1 (tree type, tree op0, enum tree_code code,
1798 		       tree op1, bitmap visited, enum br_predictor *predictor)
1799 {
1800   gimple def;
1801 
1802   if (predictor)
1803     *predictor = PRED_UNCONDITIONAL;
1804 
1805   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1806     {
1807       if (TREE_CONSTANT (op0))
1808 	return op0;
1809 
1810       if (code != SSA_NAME)
1811 	return NULL_TREE;
1812 
1813       def = SSA_NAME_DEF_STMT (op0);
1814 
1815       /* If we were already here, break the infinite cycle.  */
1816       if (!bitmap_set_bit (visited, SSA_NAME_VERSION (op0)))
1817 	return NULL;
1818 
1819       if (gimple_code (def) == GIMPLE_PHI)
1820 	{
1821 	  /* All the arguments of the PHI node must have the same constant
1822 	     length.  */
1823 	  int i, n = gimple_phi_num_args (def);
1824 	  tree val = NULL, new_val;
1825 
1826 	  for (i = 0; i < n; i++)
1827 	    {
1828 	      tree arg = PHI_ARG_DEF (def, i);
1829 	      enum br_predictor predictor2;
1830 
1831 	      /* If this PHI has itself as an argument, we cannot
1832 		 determine the string length of this argument.  However,
1833 		 if we can find an expected constant value for the other
1834 		 PHI args then we can still be sure that this is
1835 		 likely a constant.  So be optimistic and just
1836 		 continue with the next argument.  */
1837 	      if (arg == PHI_RESULT (def))
1838 		continue;
1839 
1840 	      new_val = expr_expected_value (arg, visited, &predictor2);
1841 
1842 	      /* It is difficult to combine value predictors.  Simply assume
1843 		 that later predictor is weaker and take its prediction.  */
1844 	      if (predictor && *predictor < predictor2)
1845 		*predictor = predictor2;
1846 	      if (!new_val)
1847 		return NULL;
1848 	      if (!val)
1849 		val = new_val;
1850 	      else if (!operand_equal_p (val, new_val, false))
1851 		return NULL;
1852 	    }
1853 	  return val;
1854 	}
1855       if (is_gimple_assign (def))
1856 	{
1857 	  if (gimple_assign_lhs (def) != op0)
1858 	    return NULL;
1859 
1860 	  return expr_expected_value_1 (TREE_TYPE (gimple_assign_lhs (def)),
1861 					gimple_assign_rhs1 (def),
1862 					gimple_assign_rhs_code (def),
1863 					gimple_assign_rhs2 (def),
1864 					visited, predictor);
1865 	}
1866 
1867       if (is_gimple_call (def))
1868 	{
1869 	  tree decl = gimple_call_fndecl (def);
1870 	  if (!decl)
1871 	    {
1872 	      if (gimple_call_internal_p (def)
1873 		  && gimple_call_internal_fn (def) == IFN_BUILTIN_EXPECT)
1874 		{
1875 		  gcc_assert (gimple_call_num_args (def) == 3);
1876 		  tree val = gimple_call_arg (def, 0);
1877 		  if (TREE_CONSTANT (val))
1878 		    return val;
1879 		  if (predictor)
1880 		    {
1881 		      *predictor = PRED_BUILTIN_EXPECT;
1882 		      tree val2 = gimple_call_arg (def, 2);
1883 		      gcc_assert (TREE_CODE (val2) == INTEGER_CST
1884 				  && tree_fits_uhwi_p (val2)
1885 				  && tree_to_uhwi (val2) < END_PREDICTORS);
1886 		      *predictor = (enum br_predictor) tree_to_uhwi (val2);
1887 		    }
1888 		  return gimple_call_arg (def, 1);
1889 		}
1890 	      return NULL;
1891 	    }
1892 	  if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
1893 	    switch (DECL_FUNCTION_CODE (decl))
1894 	      {
1895 	      case BUILT_IN_EXPECT:
1896 		{
1897 		  tree val;
1898 		  if (gimple_call_num_args (def) != 2)
1899 		    return NULL;
1900 		  val = gimple_call_arg (def, 0);
1901 		  if (TREE_CONSTANT (val))
1902 		    return val;
1903 		  if (predictor)
1904 		    *predictor = PRED_BUILTIN_EXPECT;
1905 		  return gimple_call_arg (def, 1);
1906 		}
1907 
1908 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
1909 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_1:
1910 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_2:
1911 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_4:
1912 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_8:
1913 	      case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_16:
1914 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE:
1915 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
1916 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
1917 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
1918 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
1919 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
1920 	      case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
1921 		/* Assume that any given atomic operation has low contention,
1922 		   and thus the compare-and-swap operation succeeds.  */
1923 		if (predictor)
1924 		  *predictor = PRED_COMPARE_AND_SWAP;
1925 		return boolean_true_node;
1926 	    }
1927 	}
1928 
1929       return NULL;
1930     }
1931 
1932   if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
1933     {
1934       tree res;
1935       enum br_predictor predictor2;
1936       op0 = expr_expected_value (op0, visited, predictor);
1937       if (!op0)
1938 	return NULL;
1939       op1 = expr_expected_value (op1, visited, &predictor2);
1940       if (predictor && *predictor < predictor2)
1941 	*predictor = predictor2;
1942       if (!op1)
1943 	return NULL;
1944       res = fold_build2 (code, type, op0, op1);
1945       if (TREE_CONSTANT (res))
1946 	return res;
1947       return NULL;
1948     }
1949   if (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS)
1950     {
1951       tree res;
1952       op0 = expr_expected_value (op0, visited, predictor);
1953       if (!op0)
1954 	return NULL;
1955       res = fold_build1 (code, type, op0);
1956       if (TREE_CONSTANT (res))
1957 	return res;
1958       return NULL;
1959     }
1960   return NULL;
1961 }
1962 
1963 /* Return constant EXPR will likely have at execution time, NULL if unknown.
1964    The function is used by builtin_expect branch predictor so the evidence
1965    must come from this construct and additional possible constant folding.
1966 
1967    We may want to implement more involved value guess (such as value range
1968    propagation based prediction), but such tricks shall go to new
1969    implementation.  */
1970 
1971 static tree
expr_expected_value(tree expr,bitmap visited,enum br_predictor * predictor)1972 expr_expected_value (tree expr, bitmap visited,
1973 		     enum br_predictor *predictor)
1974 {
1975   enum tree_code code;
1976   tree op0, op1;
1977 
1978   if (TREE_CONSTANT (expr))
1979     {
1980       if (predictor)
1981 	*predictor = PRED_UNCONDITIONAL;
1982       return expr;
1983     }
1984 
1985   extract_ops_from_tree (expr, &code, &op0, &op1);
1986   return expr_expected_value_1 (TREE_TYPE (expr),
1987 				op0, code, op1, visited, predictor);
1988 }
1989 
1990 
1991 /* Get rid of all builtin_expect calls and GIMPLE_PREDICT statements
1992    we no longer need.  */
1993 static unsigned int
strip_predict_hints(void)1994 strip_predict_hints (void)
1995 {
1996   basic_block bb;
1997   gimple ass_stmt;
1998   tree var;
1999 
2000   FOR_EACH_BB_FN (bb, cfun)
2001     {
2002       gimple_stmt_iterator bi;
2003       for (bi = gsi_start_bb (bb); !gsi_end_p (bi);)
2004 	{
2005 	  gimple stmt = gsi_stmt (bi);
2006 
2007 	  if (gimple_code (stmt) == GIMPLE_PREDICT)
2008 	    {
2009 	      gsi_remove (&bi, true);
2010 	      continue;
2011 	    }
2012 	  else if (is_gimple_call (stmt))
2013 	    {
2014 	      tree fndecl = gimple_call_fndecl (stmt);
2015 
2016 	      if ((fndecl
2017 		   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
2018 		   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
2019 		   && gimple_call_num_args (stmt) == 2)
2020 		  || (gimple_call_internal_p (stmt)
2021 		      && gimple_call_internal_fn (stmt) == IFN_BUILTIN_EXPECT))
2022 		{
2023 		  var = gimple_call_lhs (stmt);
2024 		  if (var)
2025 		    {
2026 		      ass_stmt
2027 			= gimple_build_assign (var, gimple_call_arg (stmt, 0));
2028 		      gsi_replace (&bi, ass_stmt, true);
2029 		    }
2030 		  else
2031 		    {
2032 		      gsi_remove (&bi, true);
2033 		      continue;
2034 		    }
2035 		}
2036 	    }
2037 	  gsi_next (&bi);
2038 	}
2039     }
2040   return 0;
2041 }
2042 
2043 /* Predict using opcode of the last statement in basic block.  */
2044 static void
tree_predict_by_opcode(basic_block bb)2045 tree_predict_by_opcode (basic_block bb)
2046 {
2047   gimple stmt = last_stmt (bb);
2048   edge then_edge;
2049   tree op0, op1;
2050   tree type;
2051   tree val;
2052   enum tree_code cmp;
2053   bitmap visited;
2054   edge_iterator ei;
2055   enum br_predictor predictor;
2056 
2057   if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2058     return;
2059   FOR_EACH_EDGE (then_edge, ei, bb->succs)
2060     if (then_edge->flags & EDGE_TRUE_VALUE)
2061       break;
2062   op0 = gimple_cond_lhs (stmt);
2063   op1 = gimple_cond_rhs (stmt);
2064   cmp = gimple_cond_code (stmt);
2065   type = TREE_TYPE (op0);
2066   visited = BITMAP_ALLOC (NULL);
2067   val = expr_expected_value_1 (boolean_type_node, op0, cmp, op1, visited,
2068 			       &predictor);
2069   BITMAP_FREE (visited);
2070   if (val && TREE_CODE (val) == INTEGER_CST)
2071     {
2072       if (predictor == PRED_BUILTIN_EXPECT)
2073 	{
2074 	  int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
2075 
2076 	  gcc_assert (percent >= 0 && percent <= 100);
2077 	  if (integer_zerop (val))
2078 	    percent = 100 - percent;
2079 	  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
2080 	}
2081       else
2082 	predict_edge (then_edge, predictor,
2083 		      integer_zerop (val) ? NOT_TAKEN : TAKEN);
2084     }
2085   /* Try "pointer heuristic."
2086      A comparison ptr == 0 is predicted as false.
2087      Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
2088   if (POINTER_TYPE_P (type))
2089     {
2090       if (cmp == EQ_EXPR)
2091 	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
2092       else if (cmp == NE_EXPR)
2093 	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
2094     }
2095   else
2096 
2097   /* Try "opcode heuristic."
2098      EQ tests are usually false and NE tests are usually true. Also,
2099      most quantities are positive, so we can make the appropriate guesses
2100      about signed comparisons against zero.  */
2101     switch (cmp)
2102       {
2103       case EQ_EXPR:
2104       case UNEQ_EXPR:
2105 	/* Floating point comparisons appears to behave in a very
2106 	   unpredictable way because of special role of = tests in
2107 	   FP code.  */
2108 	if (FLOAT_TYPE_P (type))
2109 	  ;
2110 	/* Comparisons with 0 are often used for booleans and there is
2111 	   nothing useful to predict about them.  */
2112 	else if (integer_zerop (op0) || integer_zerop (op1))
2113 	  ;
2114 	else
2115 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
2116 	break;
2117 
2118       case NE_EXPR:
2119       case LTGT_EXPR:
2120 	/* Floating point comparisons appears to behave in a very
2121 	   unpredictable way because of special role of = tests in
2122 	   FP code.  */
2123 	if (FLOAT_TYPE_P (type))
2124 	  ;
2125 	/* Comparisons with 0 are often used for booleans and there is
2126 	   nothing useful to predict about them.  */
2127 	else if (integer_zerop (op0)
2128 		 || integer_zerop (op1))
2129 	  ;
2130 	else
2131 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
2132 	break;
2133 
2134       case ORDERED_EXPR:
2135 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
2136 	break;
2137 
2138       case UNORDERED_EXPR:
2139 	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
2140 	break;
2141 
2142       case LE_EXPR:
2143       case LT_EXPR:
2144 	if (integer_zerop (op1)
2145 	    || integer_onep (op1)
2146 	    || integer_all_onesp (op1)
2147 	    || real_zerop (op1)
2148 	    || real_onep (op1)
2149 	    || real_minus_onep (op1))
2150 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
2151 	break;
2152 
2153       case GE_EXPR:
2154       case GT_EXPR:
2155 	if (integer_zerop (op1)
2156 	    || integer_onep (op1)
2157 	    || integer_all_onesp (op1)
2158 	    || real_zerop (op1)
2159 	    || real_onep (op1)
2160 	    || real_minus_onep (op1))
2161 	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
2162 	break;
2163 
2164       default:
2165 	break;
2166       }
2167 }
2168 
2169 /* Try to guess whether the value of return means error code.  */
2170 
2171 static enum br_predictor
return_prediction(tree val,enum prediction * prediction)2172 return_prediction (tree val, enum prediction *prediction)
2173 {
2174   /* VOID.  */
2175   if (!val)
2176     return PRED_NO_PREDICTION;
2177   /* Different heuristics for pointers and scalars.  */
2178   if (POINTER_TYPE_P (TREE_TYPE (val)))
2179     {
2180       /* NULL is usually not returned.  */
2181       if (integer_zerop (val))
2182 	{
2183 	  *prediction = NOT_TAKEN;
2184 	  return PRED_NULL_RETURN;
2185 	}
2186     }
2187   else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
2188     {
2189       /* Negative return values are often used to indicate
2190          errors.  */
2191       if (TREE_CODE (val) == INTEGER_CST
2192 	  && tree_int_cst_sgn (val) < 0)
2193 	{
2194 	  *prediction = NOT_TAKEN;
2195 	  return PRED_NEGATIVE_RETURN;
2196 	}
2197       /* Constant return values seems to be commonly taken.
2198          Zero/one often represent booleans so exclude them from the
2199 	 heuristics.  */
2200       if (TREE_CONSTANT (val)
2201 	  && (!integer_zerop (val) && !integer_onep (val)))
2202 	{
2203 	  *prediction = TAKEN;
2204 	  return PRED_CONST_RETURN;
2205 	}
2206     }
2207   return PRED_NO_PREDICTION;
2208 }
2209 
2210 /* Find the basic block with return expression and look up for possible
2211    return value trying to apply RETURN_PREDICTION heuristics.  */
2212 static void
apply_return_prediction(void)2213 apply_return_prediction (void)
2214 {
2215   gimple return_stmt = NULL;
2216   tree return_val;
2217   edge e;
2218   gimple phi;
2219   int phi_num_args, i;
2220   enum br_predictor pred;
2221   enum prediction direction;
2222   edge_iterator ei;
2223 
2224   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2225     {
2226       return_stmt = last_stmt (e->src);
2227       if (return_stmt
2228 	  && gimple_code (return_stmt) == GIMPLE_RETURN)
2229 	break;
2230     }
2231   if (!e)
2232     return;
2233   return_val = gimple_return_retval (return_stmt);
2234   if (!return_val)
2235     return;
2236   if (TREE_CODE (return_val) != SSA_NAME
2237       || !SSA_NAME_DEF_STMT (return_val)
2238       || gimple_code (SSA_NAME_DEF_STMT (return_val)) != GIMPLE_PHI)
2239     return;
2240   phi = SSA_NAME_DEF_STMT (return_val);
2241   phi_num_args = gimple_phi_num_args (phi);
2242   pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
2243 
2244   /* Avoid the degenerate case where all return values form the function
2245      belongs to same category (ie they are all positive constants)
2246      so we can hardly say something about them.  */
2247   for (i = 1; i < phi_num_args; i++)
2248     if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
2249       break;
2250   if (i != phi_num_args)
2251     for (i = 0; i < phi_num_args; i++)
2252       {
2253 	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
2254 	if (pred != PRED_NO_PREDICTION)
2255 	  predict_paths_leading_to_edge (gimple_phi_arg_edge (phi, i), pred,
2256 				         direction);
2257       }
2258 }
2259 
2260 /* Look for basic block that contains unlikely to happen events
2261    (such as noreturn calls) and mark all paths leading to execution
2262    of this basic blocks as unlikely.  */
2263 
2264 static void
tree_bb_level_predictions(void)2265 tree_bb_level_predictions (void)
2266 {
2267   basic_block bb;
2268   bool has_return_edges = false;
2269   edge e;
2270   edge_iterator ei;
2271 
2272   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2273     if (!(e->flags & (EDGE_ABNORMAL | EDGE_FAKE | EDGE_EH)))
2274       {
2275         has_return_edges = true;
2276 	break;
2277       }
2278 
2279   apply_return_prediction ();
2280 
2281   FOR_EACH_BB_FN (bb, cfun)
2282     {
2283       gimple_stmt_iterator gsi;
2284 
2285       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2286 	{
2287 	  gimple stmt = gsi_stmt (gsi);
2288 	  tree decl;
2289 
2290 	  if (is_gimple_call (stmt))
2291 	    {
2292 	      if ((gimple_call_flags (stmt) & ECF_NORETURN)
2293 	          && has_return_edges)
2294 		predict_paths_leading_to (bb, PRED_NORETURN,
2295 					  NOT_TAKEN);
2296 	      decl = gimple_call_fndecl (stmt);
2297 	      if (decl
2298 		  && lookup_attribute ("cold",
2299 				       DECL_ATTRIBUTES (decl)))
2300 		predict_paths_leading_to (bb, PRED_COLD_FUNCTION,
2301 					  NOT_TAKEN);
2302 	    }
2303 	  else if (gimple_code (stmt) == GIMPLE_PREDICT)
2304 	    {
2305 	      predict_paths_leading_to (bb, gimple_predict_predictor (stmt),
2306 					gimple_predict_outcome (stmt));
2307 	      /* Keep GIMPLE_PREDICT around so early inlining will propagate
2308 	         hints to callers.  */
2309 	    }
2310 	}
2311     }
2312 }
2313 
2314 #ifdef ENABLE_CHECKING
2315 
2316 /* Callback for pointer_map_traverse, asserts that the pointer map is
2317    empty.  */
2318 
2319 static bool
assert_is_empty(const void * key ATTRIBUTE_UNUSED,void ** value,void * data ATTRIBUTE_UNUSED)2320 assert_is_empty (const void *key ATTRIBUTE_UNUSED, void **value,
2321 		 void *data ATTRIBUTE_UNUSED)
2322 {
2323   gcc_assert (!*value);
2324   return false;
2325 }
2326 #endif
2327 
2328 /* Predict branch probabilities and estimate profile for basic block BB.  */
2329 
2330 static void
tree_estimate_probability_bb(basic_block bb)2331 tree_estimate_probability_bb (basic_block bb)
2332 {
2333   edge e;
2334   edge_iterator ei;
2335   gimple last;
2336 
2337   FOR_EACH_EDGE (e, ei, bb->succs)
2338     {
2339       /* Predict edges to user labels with attributes.  */
2340       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
2341 	{
2342 	  gimple_stmt_iterator gi;
2343 	  for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
2344 	    {
2345 	      gimple stmt = gsi_stmt (gi);
2346 	      tree decl;
2347 
2348 	      if (gimple_code (stmt) != GIMPLE_LABEL)
2349 		break;
2350 	      decl = gimple_label_label (stmt);
2351 	      if (DECL_ARTIFICIAL (decl))
2352 		continue;
2353 
2354 	      /* Finally, we have a user-defined label.  */
2355 	      if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
2356 		predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
2357 	      else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
2358 		predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
2359 	    }
2360 	}
2361 
2362       /* Predict early returns to be probable, as we've already taken
2363 	 care for error returns and other cases are often used for
2364 	 fast paths through function.
2365 
2366 	 Since we've already removed the return statements, we are
2367 	 looking for CFG like:
2368 
2369 	 if (conditional)
2370 	 {
2371 	 ..
2372 	 goto return_block
2373 	 }
2374 	 some other blocks
2375 	 return_block:
2376 	 return_stmt.  */
2377       if (e->dest != bb->next_bb
2378 	  && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379 	  && single_succ_p (e->dest)
2380 	  && single_succ_edge (e->dest)->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2381 	  && (last = last_stmt (e->dest)) != NULL
2382 	  && gimple_code (last) == GIMPLE_RETURN)
2383 	{
2384 	  edge e1;
2385 	  edge_iterator ei1;
2386 
2387 	  if (single_succ_p (bb))
2388 	    {
2389 	      FOR_EACH_EDGE (e1, ei1, bb->preds)
2390 		if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
2391 		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
2392 		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN))
2393 		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2394 	    }
2395 	  else
2396 	    if (!predicted_by_p (e->src, PRED_NULL_RETURN)
2397 		&& !predicted_by_p (e->src, PRED_CONST_RETURN)
2398 		&& !predicted_by_p (e->src, PRED_NEGATIVE_RETURN))
2399 	      predict_edge_def (e, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
2400 	}
2401 
2402       /* Look for block we are guarding (ie we dominate it,
2403 	 but it doesn't postdominate us).  */
2404       if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) && e->dest != bb
2405 	  && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
2406 	  && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
2407 	{
2408 	  gimple_stmt_iterator bi;
2409 
2410 	  /* The call heuristic claims that a guarded function call
2411 	     is improbable.  This is because such calls are often used
2412 	     to signal exceptional situations such as printing error
2413 	     messages.  */
2414 	  for (bi = gsi_start_bb (e->dest); !gsi_end_p (bi);
2415 	       gsi_next (&bi))
2416 	    {
2417 	      gimple stmt = gsi_stmt (bi);
2418 	      if (is_gimple_call (stmt)
2419 		  /* Constant and pure calls are hardly used to signalize
2420 		     something exceptional.  */
2421 		  && gimple_has_side_effects (stmt))
2422 		{
2423 		  predict_edge_def (e, PRED_CALL, NOT_TAKEN);
2424 		  break;
2425 		}
2426 	    }
2427 	}
2428     }
2429   tree_predict_by_opcode (bb);
2430 }
2431 
2432 /* Predict branch probabilities and estimate profile of the tree CFG.
2433    This function can be called from the loop optimizers to recompute
2434    the profile information.  */
2435 
2436 void
tree_estimate_probability(void)2437 tree_estimate_probability (void)
2438 {
2439   basic_block bb;
2440 
2441   add_noreturn_fake_exit_edges ();
2442   connect_infinite_loops_to_exit ();
2443   /* We use loop_niter_by_eval, which requires that the loops have
2444      preheaders.  */
2445   create_preheaders (CP_SIMPLE_PREHEADERS);
2446   calculate_dominance_info (CDI_POST_DOMINATORS);
2447 
2448   bb_predictions = pointer_map_create ();
2449   tree_bb_level_predictions ();
2450   record_loop_exits ();
2451 
2452   if (number_of_loops (cfun) > 1)
2453     predict_loops ();
2454 
2455   FOR_EACH_BB_FN (bb, cfun)
2456     tree_estimate_probability_bb (bb);
2457 
2458   FOR_EACH_BB_FN (bb, cfun)
2459     combine_predictions_for_bb (bb);
2460 
2461 #ifdef ENABLE_CHECKING
2462   pointer_map_traverse (bb_predictions, assert_is_empty, NULL);
2463 #endif
2464   pointer_map_destroy (bb_predictions);
2465   bb_predictions = NULL;
2466 
2467   estimate_bb_frequencies (false);
2468   free_dominance_info (CDI_POST_DOMINATORS);
2469   remove_fake_exit_edges ();
2470 }
2471 
2472 /* Predict branch probabilities and estimate profile of the tree CFG.
2473    This is the driver function for PASS_PROFILE.  */
2474 
2475 static unsigned int
tree_estimate_probability_driver(void)2476 tree_estimate_probability_driver (void)
2477 {
2478   unsigned nb_loops;
2479 
2480   loop_optimizer_init (LOOPS_NORMAL);
2481   if (dump_file && (dump_flags & TDF_DETAILS))
2482     flow_loops_dump (dump_file, NULL, 0);
2483 
2484   mark_irreducible_loops ();
2485 
2486   nb_loops = number_of_loops (cfun);
2487   if (nb_loops > 1)
2488     scev_initialize ();
2489 
2490   tree_estimate_probability ();
2491 
2492   if (nb_loops > 1)
2493     scev_finalize ();
2494 
2495   loop_optimizer_finalize ();
2496   if (dump_file && (dump_flags & TDF_DETAILS))
2497     gimple_dump_cfg (dump_file, dump_flags);
2498   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
2499     profile_status_for_fn (cfun) = PROFILE_GUESSED;
2500   return 0;
2501 }
2502 
2503 /* Predict edges to successors of CUR whose sources are not postdominated by
2504    BB by PRED and recurse to all postdominators.  */
2505 
2506 static void
predict_paths_for_bb(basic_block cur,basic_block bb,enum br_predictor pred,enum prediction taken,bitmap visited)2507 predict_paths_for_bb (basic_block cur, basic_block bb,
2508 		      enum br_predictor pred,
2509 		      enum prediction taken,
2510 		      bitmap visited)
2511 {
2512   edge e;
2513   edge_iterator ei;
2514   basic_block son;
2515 
2516   /* We are looking for all edges forming edge cut induced by
2517      set of all blocks postdominated by BB.  */
2518   FOR_EACH_EDGE (e, ei, cur->preds)
2519     if (e->src->index >= NUM_FIXED_BLOCKS
2520 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, bb))
2521     {
2522       edge e2;
2523       edge_iterator ei2;
2524       bool found = false;
2525 
2526       /* Ignore fake edges and eh, we predict them as not taken anyway.  */
2527       if (e->flags & (EDGE_EH | EDGE_FAKE))
2528 	continue;
2529       gcc_assert (bb == cur || dominated_by_p (CDI_POST_DOMINATORS, cur, bb));
2530 
2531       /* See if there is an edge from e->src that is not abnormal
2532 	 and does not lead to BB.  */
2533       FOR_EACH_EDGE (e2, ei2, e->src->succs)
2534 	if (e2 != e
2535 	    && !(e2->flags & (EDGE_EH | EDGE_FAKE))
2536 	    && !dominated_by_p (CDI_POST_DOMINATORS, e2->dest, bb))
2537 	  {
2538 	    found = true;
2539 	    break;
2540 	  }
2541 
2542       /* If there is non-abnormal path leaving e->src, predict edge
2543 	 using predictor.  Otherwise we need to look for paths
2544 	 leading to e->src.
2545 
2546 	 The second may lead to infinite loop in the case we are predicitng
2547 	 regions that are only reachable by abnormal edges.  We simply
2548 	 prevent visiting given BB twice.  */
2549       if (found)
2550         predict_edge_def (e, pred, taken);
2551       else if (bitmap_set_bit (visited, e->src->index))
2552 	predict_paths_for_bb (e->src, e->src, pred, taken, visited);
2553     }
2554   for (son = first_dom_son (CDI_POST_DOMINATORS, cur);
2555        son;
2556        son = next_dom_son (CDI_POST_DOMINATORS, son))
2557     predict_paths_for_bb (son, bb, pred, taken, visited);
2558 }
2559 
2560 /* Sets branch probabilities according to PREDiction and
2561    FLAGS.  */
2562 
2563 static void
predict_paths_leading_to(basic_block bb,enum br_predictor pred,enum prediction taken)2564 predict_paths_leading_to (basic_block bb, enum br_predictor pred,
2565 			  enum prediction taken)
2566 {
2567   bitmap visited = BITMAP_ALLOC (NULL);
2568   predict_paths_for_bb (bb, bb, pred, taken, visited);
2569   BITMAP_FREE (visited);
2570 }
2571 
2572 /* Like predict_paths_leading_to but take edge instead of basic block.  */
2573 
2574 static void
predict_paths_leading_to_edge(edge e,enum br_predictor pred,enum prediction taken)2575 predict_paths_leading_to_edge (edge e, enum br_predictor pred,
2576 			       enum prediction taken)
2577 {
2578   bool has_nonloop_edge = false;
2579   edge_iterator ei;
2580   edge e2;
2581 
2582   basic_block bb = e->src;
2583   FOR_EACH_EDGE (e2, ei, bb->succs)
2584     if (e2->dest != e->src && e2->dest != e->dest
2585 	&& !(e->flags & (EDGE_EH | EDGE_FAKE))
2586 	&& !dominated_by_p (CDI_POST_DOMINATORS, e->src, e2->dest))
2587       {
2588 	has_nonloop_edge = true;
2589 	break;
2590       }
2591   if (!has_nonloop_edge)
2592     {
2593       bitmap visited = BITMAP_ALLOC (NULL);
2594       predict_paths_for_bb (bb, bb, pred, taken, visited);
2595       BITMAP_FREE (visited);
2596     }
2597   else
2598     predict_edge_def (e, pred, taken);
2599 }
2600 
2601 /* This is used to carry information about basic blocks.  It is
2602    attached to the AUX field of the standard CFG block.  */
2603 
2604 typedef struct block_info_def
2605 {
2606   /* Estimated frequency of execution of basic_block.  */
2607   sreal frequency;
2608 
2609   /* To keep queue of basic blocks to process.  */
2610   basic_block next;
2611 
2612   /* Number of predecessors we need to visit first.  */
2613   int npredecessors;
2614 } *block_info;
2615 
2616 /* Similar information for edges.  */
2617 typedef struct edge_info_def
2618 {
2619   /* In case edge is a loopback edge, the probability edge will be reached
2620      in case header is.  Estimated number of iterations of the loop can be
2621      then computed as 1 / (1 - back_edge_prob).  */
2622   sreal back_edge_prob;
2623   /* True if the edge is a loopback edge in the natural loop.  */
2624   unsigned int back_edge:1;
2625 } *edge_info;
2626 
2627 #define BLOCK_INFO(B)	((block_info) (B)->aux)
2628 #define EDGE_INFO(E)	((edge_info) (E)->aux)
2629 
2630 /* Helper function for estimate_bb_frequencies.
2631    Propagate the frequencies in blocks marked in
2632    TOVISIT, starting in HEAD.  */
2633 
2634 static void
propagate_freq(basic_block head,bitmap tovisit)2635 propagate_freq (basic_block head, bitmap tovisit)
2636 {
2637   basic_block bb;
2638   basic_block last;
2639   unsigned i;
2640   edge e;
2641   basic_block nextbb;
2642   bitmap_iterator bi;
2643 
2644   /* For each basic block we need to visit count number of his predecessors
2645      we need to visit first.  */
2646   EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
2647     {
2648       edge_iterator ei;
2649       int count = 0;
2650 
2651       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2652 
2653       FOR_EACH_EDGE (e, ei, bb->preds)
2654 	{
2655 	  bool visit = bitmap_bit_p (tovisit, e->src->index);
2656 
2657 	  if (visit && !(e->flags & EDGE_DFS_BACK))
2658 	    count++;
2659 	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
2660 	    fprintf (dump_file,
2661 		     "Irreducible region hit, ignoring edge to %i->%i\n",
2662 		     e->src->index, bb->index);
2663 	}
2664       BLOCK_INFO (bb)->npredecessors = count;
2665       /* When function never returns, we will never process exit block.  */
2666       if (!count && bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2667 	bb->count = bb->frequency = 0;
2668     }
2669 
2670   memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
2671   last = head;
2672   for (bb = head; bb; bb = nextbb)
2673     {
2674       edge_iterator ei;
2675       sreal cyclic_probability, frequency;
2676 
2677       memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
2678       memcpy (&frequency, &real_zero, sizeof (real_zero));
2679 
2680       nextbb = BLOCK_INFO (bb)->next;
2681       BLOCK_INFO (bb)->next = NULL;
2682 
2683       /* Compute frequency of basic block.  */
2684       if (bb != head)
2685 	{
2686 #ifdef ENABLE_CHECKING
2687 	  FOR_EACH_EDGE (e, ei, bb->preds)
2688 	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
2689 			|| (e->flags & EDGE_DFS_BACK));
2690 #endif
2691 
2692 	  FOR_EACH_EDGE (e, ei, bb->preds)
2693 	    if (EDGE_INFO (e)->back_edge)
2694 	      {
2695 		sreal_add (&cyclic_probability, &cyclic_probability,
2696 			   &EDGE_INFO (e)->back_edge_prob);
2697 	      }
2698 	    else if (!(e->flags & EDGE_DFS_BACK))
2699 	      {
2700 		sreal tmp;
2701 
2702 		/*  frequency += (e->probability
2703 				  * BLOCK_INFO (e->src)->frequency /
2704 				  REG_BR_PROB_BASE);  */
2705 
2706 		sreal_init (&tmp, e->probability, 0);
2707 		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
2708 		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
2709 		sreal_add (&frequency, &frequency, &tmp);
2710 	      }
2711 
2712 	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
2713 	    {
2714 	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
2715 		      sizeof (frequency));
2716 	    }
2717 	  else
2718 	    {
2719 	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
2720 		{
2721 		  memcpy (&cyclic_probability, &real_almost_one,
2722 			  sizeof (real_almost_one));
2723 		}
2724 
2725 	      /* BLOCK_INFO (bb)->frequency = frequency
2726 					      / (1 - cyclic_probability) */
2727 
2728 	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
2729 	      sreal_div (&BLOCK_INFO (bb)->frequency,
2730 			 &frequency, &cyclic_probability);
2731 	    }
2732 	}
2733 
2734       bitmap_clear_bit (tovisit, bb->index);
2735 
2736       e = find_edge (bb, head);
2737       if (e)
2738 	{
2739 	  sreal tmp;
2740 
2741 	  /* EDGE_INFO (e)->back_edge_prob
2742 	     = ((e->probability * BLOCK_INFO (bb)->frequency)
2743 	     / REG_BR_PROB_BASE); */
2744 
2745 	  sreal_init (&tmp, e->probability, 0);
2746 	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
2747 	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
2748 		     &tmp, &real_inv_br_prob_base);
2749 	}
2750 
2751       /* Propagate to successor blocks.  */
2752       FOR_EACH_EDGE (e, ei, bb->succs)
2753 	if (!(e->flags & EDGE_DFS_BACK)
2754 	    && BLOCK_INFO (e->dest)->npredecessors)
2755 	  {
2756 	    BLOCK_INFO (e->dest)->npredecessors--;
2757 	    if (!BLOCK_INFO (e->dest)->npredecessors)
2758 	      {
2759 		if (!nextbb)
2760 		  nextbb = e->dest;
2761 		else
2762 		  BLOCK_INFO (last)->next = e->dest;
2763 
2764 		last = e->dest;
2765 	      }
2766 	  }
2767     }
2768 }
2769 
2770 /* Estimate frequencies in loops at same nest level.  */
2771 
2772 static void
estimate_loops_at_level(struct loop * first_loop)2773 estimate_loops_at_level (struct loop *first_loop)
2774 {
2775   struct loop *loop;
2776 
2777   for (loop = first_loop; loop; loop = loop->next)
2778     {
2779       edge e;
2780       basic_block *bbs;
2781       unsigned i;
2782       bitmap tovisit = BITMAP_ALLOC (NULL);
2783 
2784       estimate_loops_at_level (loop->inner);
2785 
2786       /* Find current loop back edge and mark it.  */
2787       e = loop_latch_edge (loop);
2788       EDGE_INFO (e)->back_edge = 1;
2789 
2790       bbs = get_loop_body (loop);
2791       for (i = 0; i < loop->num_nodes; i++)
2792 	bitmap_set_bit (tovisit, bbs[i]->index);
2793       free (bbs);
2794       propagate_freq (loop->header, tovisit);
2795       BITMAP_FREE (tovisit);
2796     }
2797 }
2798 
2799 /* Propagates frequencies through structure of loops.  */
2800 
2801 static void
estimate_loops(void)2802 estimate_loops (void)
2803 {
2804   bitmap tovisit = BITMAP_ALLOC (NULL);
2805   basic_block bb;
2806 
2807   /* Start by estimating the frequencies in the loops.  */
2808   if (number_of_loops (cfun) > 1)
2809     estimate_loops_at_level (current_loops->tree_root->inner);
2810 
2811   /* Now propagate the frequencies through all the blocks.  */
2812   FOR_ALL_BB_FN (bb, cfun)
2813     {
2814       bitmap_set_bit (tovisit, bb->index);
2815     }
2816   propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit);
2817   BITMAP_FREE (tovisit);
2818 }
2819 
2820 /* Drop the profile for NODE to guessed, and update its frequency based on
2821    whether it is expected to be hot given the CALL_COUNT.  */
2822 
2823 static void
drop_profile(struct cgraph_node * node,gcov_type call_count)2824 drop_profile (struct cgraph_node *node, gcov_type call_count)
2825 {
2826   struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2827   /* In the case where this was called by another function with a
2828      dropped profile, call_count will be 0. Since there are no
2829      non-zero call counts to this function, we don't know for sure
2830      whether it is hot, and therefore it will be marked normal below.  */
2831   bool hot = maybe_hot_count_p (NULL, call_count);
2832 
2833   if (dump_file)
2834     fprintf (dump_file,
2835              "Dropping 0 profile for %s/%i. %s based on calls.\n",
2836              node->name (), node->order,
2837              hot ? "Function is hot" : "Function is normal");
2838   /* We only expect to miss profiles for functions that are reached
2839      via non-zero call edges in cases where the function may have
2840      been linked from another module or library (COMDATs and extern
2841      templates). See the comments below for handle_missing_profiles.
2842      Also, only warn in cases where the missing counts exceed the
2843      number of training runs. In certain cases with an execv followed
2844      by a no-return call the profile for the no-return call is not
2845      dumped and there can be a mismatch.  */
2846   if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl)
2847       && call_count > profile_info->runs)
2848     {
2849       if (flag_profile_correction)
2850         {
2851           if (dump_file)
2852             fprintf (dump_file,
2853                      "Missing counts for called function %s/%i\n",
2854                      node->name (), node->order);
2855         }
2856       else
2857         warning (0, "Missing counts for called function %s/%i",
2858                  node->name (), node->order);
2859     }
2860 
2861   profile_status_for_fn (fn)
2862       = (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
2863   node->frequency
2864       = hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
2865 }
2866 
2867 /* In the case of COMDAT routines, multiple object files will contain the same
2868    function and the linker will select one for the binary. In that case
2869    all the other copies from the profile instrument binary will be missing
2870    profile counts. Look for cases where this happened, due to non-zero
2871    call counts going to 0-count functions, and drop the profile to guessed
2872    so that we can use the estimated probabilities and avoid optimizing only
2873    for size.
2874 
2875    The other case where the profile may be missing is when the routine
2876    is not going to be emitted to the object file, e.g. for "extern template"
2877    class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
2878    all other cases of non-zero calls to 0-count functions.  */
2879 
2880 void
handle_missing_profiles(void)2881 handle_missing_profiles (void)
2882 {
2883   struct cgraph_node *node;
2884   int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
2885   vec<struct cgraph_node *> worklist;
2886   worklist.create (64);
2887 
2888   /* See if 0 count function has non-0 count callers.  In this case we
2889      lost some profile.  Drop its function profile to PROFILE_GUESSED.  */
2890   FOR_EACH_DEFINED_FUNCTION (node)
2891     {
2892       struct cgraph_edge *e;
2893       gcov_type call_count = 0;
2894       gcov_type max_tp_first_run = 0;
2895       struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
2896 
2897       if (node->count)
2898         continue;
2899       for (e = node->callers; e; e = e->next_caller)
2900       {
2901         call_count += e->count;
2902 
2903 	if (e->caller->tp_first_run > max_tp_first_run)
2904 	  max_tp_first_run = e->caller->tp_first_run;
2905       }
2906 
2907       /* If time profile is missing, let assign the maximum that comes from
2908 	 caller functions.  */
2909       if (!node->tp_first_run && max_tp_first_run)
2910 	node->tp_first_run = max_tp_first_run + 1;
2911 
2912       if (call_count
2913           && fn && fn->cfg
2914           && (call_count * unlikely_count_fraction >= profile_info->runs))
2915         {
2916           drop_profile (node, call_count);
2917           worklist.safe_push (node);
2918         }
2919     }
2920 
2921   /* Propagate the profile dropping to other 0-count COMDATs that are
2922      potentially called by COMDATs we already dropped the profile on.  */
2923   while (worklist.length () > 0)
2924     {
2925       struct cgraph_edge *e;
2926 
2927       node = worklist.pop ();
2928       for (e = node->callees; e; e = e->next_caller)
2929         {
2930           struct cgraph_node *callee = e->callee;
2931           struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
2932 
2933           if (callee->count > 0)
2934             continue;
2935           if (DECL_COMDAT (callee->decl) && fn && fn->cfg
2936               && profile_status_for_fn (fn) == PROFILE_READ)
2937             {
2938               drop_profile (node, 0);
2939               worklist.safe_push (callee);
2940             }
2941         }
2942     }
2943   worklist.release ();
2944 }
2945 
2946 /* Convert counts measured by profile driven feedback to frequencies.
2947    Return nonzero iff there was any nonzero execution count.  */
2948 
2949 int
counts_to_freqs(void)2950 counts_to_freqs (void)
2951 {
2952   gcov_type count_max, true_count_max = 0;
2953   basic_block bb;
2954 
2955   /* Don't overwrite the estimated frequencies when the profile for
2956      the function is missing.  We may drop this function PROFILE_GUESSED
2957      later in drop_profile ().  */
2958   if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
2959     return 0;
2960 
2961   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2962     true_count_max = MAX (bb->count, true_count_max);
2963 
2964   count_max = MAX (true_count_max, 1);
2965   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
2966     bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
2967 
2968   return true_count_max;
2969 }
2970 
2971 /* Return true if function is likely to be expensive, so there is no point to
2972    optimize performance of prologue, epilogue or do inlining at the expense
2973    of code size growth.  THRESHOLD is the limit of number of instructions
2974    function can execute at average to be still considered not expensive.  */
2975 
2976 bool
expensive_function_p(int threshold)2977 expensive_function_p (int threshold)
2978 {
2979   unsigned int sum = 0;
2980   basic_block bb;
2981   unsigned int limit;
2982 
2983   /* We can not compute accurately for large thresholds due to scaled
2984      frequencies.  */
2985   gcc_assert (threshold <= BB_FREQ_MAX);
2986 
2987   /* Frequencies are out of range.  This either means that function contains
2988      internal loop executing more than BB_FREQ_MAX times or profile feedback
2989      is available and function has not been executed at all.  */
2990   if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency == 0)
2991     return true;
2992 
2993   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
2994   limit = ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency * threshold;
2995   FOR_EACH_BB_FN (bb, cfun)
2996     {
2997       rtx insn;
2998 
2999       FOR_BB_INSNS (bb, insn)
3000 	if (active_insn_p (insn))
3001 	  {
3002 	    sum += bb->frequency;
3003 	    if (sum > limit)
3004 	      return true;
3005 	}
3006     }
3007 
3008   return false;
3009 }
3010 
3011 /* Estimate and propagate basic block frequencies using the given branch
3012    probabilities.  If FORCE is true, the frequencies are used to estimate
3013    the counts even when there are already non-zero profile counts.  */
3014 
3015 void
estimate_bb_frequencies(bool force)3016 estimate_bb_frequencies (bool force)
3017 {
3018   basic_block bb;
3019   sreal freq_max;
3020 
3021   if (force || profile_status_for_fn (cfun) != PROFILE_READ || !counts_to_freqs ())
3022     {
3023       static int real_values_initialized = 0;
3024 
3025       if (!real_values_initialized)
3026         {
3027 	  real_values_initialized = 1;
3028 	  sreal_init (&real_zero, 0, 0);
3029 	  sreal_init (&real_one, 1, 0);
3030 	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
3031 	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
3032 	  sreal_init (&real_one_half, 1, -1);
3033 	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
3034 	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
3035 	}
3036 
3037       mark_dfs_back_edges ();
3038 
3039       single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))->probability =
3040 	 REG_BR_PROB_BASE;
3041 
3042       /* Set up block info for each basic block.  */
3043       alloc_aux_for_blocks (sizeof (struct block_info_def));
3044       alloc_aux_for_edges (sizeof (struct edge_info_def));
3045       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3046 	{
3047 	  edge e;
3048 	  edge_iterator ei;
3049 
3050 	  FOR_EACH_EDGE (e, ei, bb->succs)
3051 	    {
3052 	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
3053 	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
3054 			 &EDGE_INFO (e)->back_edge_prob,
3055 			 &real_inv_br_prob_base);
3056 	    }
3057 	}
3058 
3059       /* First compute frequencies locally for each loop from innermost
3060          to outermost to examine frequencies for back edges.  */
3061       estimate_loops ();
3062 
3063       memcpy (&freq_max, &real_zero, sizeof (real_zero));
3064       FOR_EACH_BB_FN (bb, cfun)
3065 	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
3066 	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
3067 
3068       sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
3069       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3070 	{
3071 	  sreal tmp;
3072 
3073 	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
3074 	  sreal_add (&tmp, &tmp, &real_one_half);
3075 	  bb->frequency = sreal_to_int (&tmp);
3076 	}
3077 
3078       free_aux_for_blocks ();
3079       free_aux_for_edges ();
3080     }
3081   compute_function_frequency ();
3082 }
3083 
3084 /* Decide whether function is hot, cold or unlikely executed.  */
3085 void
compute_function_frequency(void)3086 compute_function_frequency (void)
3087 {
3088   basic_block bb;
3089   struct cgraph_node *node = cgraph_get_node (current_function_decl);
3090 
3091   if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3092       || MAIN_NAME_P (DECL_NAME (current_function_decl)))
3093     node->only_called_at_startup = true;
3094   if (DECL_STATIC_DESTRUCTOR (current_function_decl))
3095     node->only_called_at_exit = true;
3096 
3097   if (profile_status_for_fn (cfun) != PROFILE_READ)
3098     {
3099       int flags = flags_from_decl_or_type (current_function_decl);
3100       if (lookup_attribute ("cold", DECL_ATTRIBUTES (current_function_decl))
3101 	  != NULL)
3102         node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3103       else if (lookup_attribute ("hot", DECL_ATTRIBUTES (current_function_decl))
3104 	       != NULL)
3105         node->frequency = NODE_FREQUENCY_HOT;
3106       else if (flags & ECF_NORETURN)
3107         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3108       else if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
3109         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3110       else if (DECL_STATIC_CONSTRUCTOR (current_function_decl)
3111 	       || DECL_STATIC_DESTRUCTOR (current_function_decl))
3112         node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
3113       return;
3114     }
3115 
3116   /* Only first time try to drop function into unlikely executed.
3117      After inlining the roundoff errors may confuse us.
3118      Ipa-profile pass will drop functions only called from unlikely
3119      functions to unlikely and that is most of what we care about.  */
3120   if (!cfun->after_inlining)
3121     node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
3122   FOR_EACH_BB_FN (bb, cfun)
3123     {
3124       if (maybe_hot_bb_p (cfun, bb))
3125 	{
3126 	  node->frequency = NODE_FREQUENCY_HOT;
3127 	  return;
3128 	}
3129       if (!probably_never_executed_bb_p (cfun, bb))
3130 	node->frequency = NODE_FREQUENCY_NORMAL;
3131     }
3132 }
3133 
3134 static bool
gate_estimate_probability(void)3135 gate_estimate_probability (void)
3136 {
3137   return flag_guess_branch_prob;
3138 }
3139 
3140 /* Build PREDICT_EXPR.  */
3141 tree
build_predict_expr(enum br_predictor predictor,enum prediction taken)3142 build_predict_expr (enum br_predictor predictor, enum prediction taken)
3143 {
3144   tree t = build1 (PREDICT_EXPR, void_type_node,
3145 		   build_int_cst (integer_type_node, predictor));
3146   SET_PREDICT_EXPR_OUTCOME (t, taken);
3147   return t;
3148 }
3149 
3150 const char *
predictor_name(enum br_predictor predictor)3151 predictor_name (enum br_predictor predictor)
3152 {
3153   return predictor_info[predictor].name;
3154 }
3155 
3156 namespace {
3157 
3158 const pass_data pass_data_profile =
3159 {
3160   GIMPLE_PASS, /* type */
3161   "profile_estimate", /* name */
3162   OPTGROUP_NONE, /* optinfo_flags */
3163   true, /* has_gate */
3164   true, /* has_execute */
3165   TV_BRANCH_PROB, /* tv_id */
3166   PROP_cfg, /* properties_required */
3167   0, /* properties_provided */
3168   0, /* properties_destroyed */
3169   0, /* todo_flags_start */
3170   TODO_verify_ssa, /* todo_flags_finish */
3171 };
3172 
3173 class pass_profile : public gimple_opt_pass
3174 {
3175 public:
pass_profile(gcc::context * ctxt)3176   pass_profile (gcc::context *ctxt)
3177     : gimple_opt_pass (pass_data_profile, ctxt)
3178   {}
3179 
3180   /* opt_pass methods: */
gate()3181   bool gate () { return gate_estimate_probability (); }
execute()3182   unsigned int execute () { return tree_estimate_probability_driver (); }
3183 
3184 }; // class pass_profile
3185 
3186 } // anon namespace
3187 
3188 gimple_opt_pass *
make_pass_profile(gcc::context * ctxt)3189 make_pass_profile (gcc::context *ctxt)
3190 {
3191   return new pass_profile (ctxt);
3192 }
3193 
3194 namespace {
3195 
3196 const pass_data pass_data_strip_predict_hints =
3197 {
3198   GIMPLE_PASS, /* type */
3199   "*strip_predict_hints", /* name */
3200   OPTGROUP_NONE, /* optinfo_flags */
3201   false, /* has_gate */
3202   true, /* has_execute */
3203   TV_BRANCH_PROB, /* tv_id */
3204   PROP_cfg, /* properties_required */
3205   0, /* properties_provided */
3206   0, /* properties_destroyed */
3207   0, /* todo_flags_start */
3208   TODO_verify_ssa, /* todo_flags_finish */
3209 };
3210 
3211 class pass_strip_predict_hints : public gimple_opt_pass
3212 {
3213 public:
pass_strip_predict_hints(gcc::context * ctxt)3214   pass_strip_predict_hints (gcc::context *ctxt)
3215     : gimple_opt_pass (pass_data_strip_predict_hints, ctxt)
3216   {}
3217 
3218   /* opt_pass methods: */
clone()3219   opt_pass * clone () { return new pass_strip_predict_hints (m_ctxt); }
execute()3220   unsigned int execute () { return strip_predict_hints (); }
3221 
3222 }; // class pass_strip_predict_hints
3223 
3224 } // anon namespace
3225 
3226 gimple_opt_pass *
make_pass_strip_predict_hints(gcc::context * ctxt)3227 make_pass_strip_predict_hints (gcc::context *ctxt)
3228 {
3229   return new pass_strip_predict_hints (ctxt);
3230 }
3231 
3232 /* Rebuild function frequencies.  Passes are in general expected to
3233    maintain profile by hand, however in some cases this is not possible:
3234    for example when inlining several functions with loops freuqencies might run
3235    out of scale and thus needs to be recomputed.  */
3236 
3237 void
rebuild_frequencies(void)3238 rebuild_frequencies (void)
3239 {
3240   timevar_push (TV_REBUILD_FREQUENCIES);
3241 
3242   /* When the max bb count in the function is small, there is a higher
3243      chance that there were truncation errors in the integer scaling
3244      of counts by inlining and other optimizations. This could lead
3245      to incorrect classification of code as being cold when it isn't.
3246      In that case, force the estimation of bb counts/frequencies from the
3247      branch probabilities, rather than computing frequencies from counts,
3248      which may also lead to frequencies incorrectly reduced to 0. There
3249      is less precision in the probabilities, so we only do this for small
3250      max counts.  */
3251   gcov_type count_max = 0;
3252   basic_block bb;
3253   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
3254     count_max = MAX (bb->count, count_max);
3255 
3256   if (profile_status_for_fn (cfun) == PROFILE_GUESSED
3257       || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
3258     {
3259       loop_optimizer_init (0);
3260       add_noreturn_fake_exit_edges ();
3261       mark_irreducible_loops ();
3262       connect_infinite_loops_to_exit ();
3263       estimate_bb_frequencies (true);
3264       remove_fake_exit_edges ();
3265       loop_optimizer_finalize ();
3266     }
3267   else if (profile_status_for_fn (cfun) == PROFILE_READ)
3268     counts_to_freqs ();
3269   else
3270     gcc_unreachable ();
3271   timevar_pop (TV_REBUILD_FREQUENCIES);
3272 }
3273