1 /* The tracer pass for the GNU compiler.
2    Contributed by Jan Hubicka, SuSE Labs.
3    Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING.  If not, write to the Free
19    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.  */
21 
22 /* This pass performs the tail duplication needed for superblock formation.
23    For more information see:
24 
25      Design and Analysis of Profile-Based Optimization in Compaq's
26      Compilation Tools for Alpha; Journal of Instruction-Level
27      Parallelism 3 (2000) 1-25
28 
29    Unlike Compaq's implementation we don't do the loop peeling as most
30    probably a better job can be done by a special pass and we don't
31    need to worry too much about the code size implications as the tail
32    duplicates are crossjumped again if optimizations are not
33    performed.  */
34 
35 
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tm.h"
40 #include "tree.h"
41 #include "rtl.h"
42 #include "hard-reg-set.h"
43 #include "basic-block.h"
44 #include "output.h"
45 #include "cfglayout.h"
46 #include "fibheap.h"
47 #include "flags.h"
48 #include "timevar.h"
49 #include "params.h"
50 #include "coverage.h"
51 #include "tree-pass.h"
52 
53 static int count_insns (basic_block);
54 static bool ignore_bb_p (basic_block);
55 static bool better_p (edge, edge);
56 static edge find_best_successor (basic_block);
57 static edge find_best_predecessor (basic_block);
58 static int find_trace (basic_block, basic_block *);
59 static void tail_duplicate (void);
60 static void layout_superblocks (void);
61 
62 /* Minimal outgoing edge probability considered for superblock formation.  */
63 static int probability_cutoff;
64 static int branch_ratio_cutoff;
65 
66 /* Return true if BB has been seen - it is connected to some trace
67    already.  */
68 
69 #define seen(bb) (bb->il.rtl->visited || bb->aux)
70 
71 /* Return true if we should ignore the basic block for purposes of tracing.  */
72 static bool
ignore_bb_p(basic_block bb)73 ignore_bb_p (basic_block bb)
74 {
75   if (bb->index < 0)
76     return true;
77   if (!maybe_hot_bb_p (bb))
78     return true;
79   return false;
80 }
81 
82 /* Return number of instructions in the block.  */
83 
84 static int
count_insns(basic_block bb)85 count_insns (basic_block bb)
86 {
87   rtx insn;
88   int n = 0;
89 
90   for (insn = BB_HEAD (bb);
91        insn != NEXT_INSN (BB_END (bb));
92        insn = NEXT_INSN (insn))
93     if (active_insn_p (insn))
94       n++;
95   return n;
96 }
97 
98 /* Return true if E1 is more frequent than E2.  */
99 static bool
better_p(edge e1,edge e2)100 better_p (edge e1, edge e2)
101 {
102   if (e1->count != e2->count)
103     return e1->count > e2->count;
104   if (e1->src->frequency * e1->probability !=
105       e2->src->frequency * e2->probability)
106     return (e1->src->frequency * e1->probability
107 	    > e2->src->frequency * e2->probability);
108   /* This is needed to avoid changes in the decision after
109      CFG is modified.  */
110   if (e1->src != e2->src)
111     return e1->src->index > e2->src->index;
112   return e1->dest->index > e2->dest->index;
113 }
114 
115 /* Return most frequent successor of basic block BB.  */
116 
117 static edge
find_best_successor(basic_block bb)118 find_best_successor (basic_block bb)
119 {
120   edge e;
121   edge best = NULL;
122   edge_iterator ei;
123 
124   FOR_EACH_EDGE (e, ei, bb->succs)
125     if (!best || better_p (e, best))
126       best = e;
127   if (!best || ignore_bb_p (best->dest))
128     return NULL;
129   if (best->probability <= probability_cutoff)
130     return NULL;
131   return best;
132 }
133 
134 /* Return most frequent predecessor of basic block BB.  */
135 
136 static edge
find_best_predecessor(basic_block bb)137 find_best_predecessor (basic_block bb)
138 {
139   edge e;
140   edge best = NULL;
141   edge_iterator ei;
142 
143   FOR_EACH_EDGE (e, ei, bb->preds)
144     if (!best || better_p (e, best))
145       best = e;
146   if (!best || ignore_bb_p (best->src))
147     return NULL;
148   if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
149       < bb->frequency * branch_ratio_cutoff)
150     return NULL;
151   return best;
152 }
153 
154 /* Find the trace using bb and record it in the TRACE array.
155    Return number of basic blocks recorded.  */
156 
157 static int
find_trace(basic_block bb,basic_block * trace)158 find_trace (basic_block bb, basic_block *trace)
159 {
160   int i = 0;
161   edge e;
162 
163   if (dump_file)
164     fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
165 
166   while ((e = find_best_predecessor (bb)) != NULL)
167     {
168       basic_block bb2 = e->src;
169       if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
170 	  || find_best_successor (bb2) != e)
171 	break;
172       if (dump_file)
173 	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
174       bb = bb2;
175     }
176   if (dump_file)
177     fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency);
178   trace[i++] = bb;
179 
180   /* Follow the trace in forward direction.  */
181   while ((e = find_best_successor (bb)) != NULL)
182     {
183       bb = e->dest;
184       if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
185 	  || find_best_predecessor (bb) != e)
186 	break;
187       if (dump_file)
188 	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
189       trace[i++] = bb;
190     }
191   if (dump_file)
192     fprintf (dump_file, "\n");
193   return i;
194 }
195 
196 /* Look for basic blocks in frequency order, construct traces and tail duplicate
197    if profitable.  */
198 
199 static void
tail_duplicate(void)200 tail_duplicate (void)
201 {
202   fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
203   basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
204   int *counts = xmalloc (sizeof (int) * last_basic_block);
205   int ninsns = 0, nduplicated = 0;
206   gcov_type weighted_insns = 0, traced_insns = 0;
207   fibheap_t heap = fibheap_new ();
208   gcov_type cover_insns;
209   int max_dup_insns;
210   basic_block bb;
211 
212   if (profile_info && flag_branch_probabilities)
213     probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
214   else
215     probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
216   probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
217 
218   branch_ratio_cutoff =
219     (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
220 
221   FOR_EACH_BB (bb)
222     {
223       int n = count_insns (bb);
224       if (!ignore_bb_p (bb))
225 	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
226 					    bb);
227 
228       counts [bb->index] = n;
229       ninsns += n;
230       weighted_insns += n * bb->frequency;
231     }
232 
233   if (profile_info && flag_branch_probabilities)
234     cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
235   else
236     cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
237   cover_insns = (weighted_insns * cover_insns + 50) / 100;
238   max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
239 
240   while (traced_insns < cover_insns && nduplicated < max_dup_insns
241          && !fibheap_empty (heap))
242     {
243       basic_block bb = fibheap_extract_min (heap);
244       int n, pos;
245 
246       if (!bb)
247 	break;
248 
249       blocks[bb->index] = NULL;
250 
251       if (ignore_bb_p (bb))
252 	continue;
253       gcc_assert (!seen (bb));
254 
255       n = find_trace (bb, trace);
256 
257       bb = trace[0];
258       traced_insns += bb->frequency * counts [bb->index];
259       if (blocks[bb->index])
260 	{
261 	  fibheap_delete_node (heap, blocks[bb->index]);
262 	  blocks[bb->index] = NULL;
263 	}
264 
265       for (pos = 1; pos < n; pos++)
266 	{
267 	  basic_block bb2 = trace[pos];
268 
269 	  if (blocks[bb2->index])
270 	    {
271 	      fibheap_delete_node (heap, blocks[bb2->index]);
272 	      blocks[bb2->index] = NULL;
273 	    }
274 	  traced_insns += bb2->frequency * counts [bb2->index];
275 	  if (EDGE_COUNT (bb2->preds) > 1
276 	      && can_duplicate_block_p (bb2))
277 	    {
278 	      edge e;
279 	      basic_block old = bb2;
280 
281 	      e = find_edge (bb, bb2);
282 
283 	      nduplicated += counts [bb2->index];
284 	      bb2 = duplicate_block (bb2, e, bb);
285 
286 	      /* Reconsider the original copy of block we've duplicated.
287 	         Removing the most common predecessor may make it to be
288 	         head.  */
289 	      blocks[old->index] =
290 		fibheap_insert (heap, -old->frequency, old);
291 
292 	      if (dump_file)
293 		fprintf (dump_file, "Duplicated %i as %i [%i]\n",
294 			 old->index, bb2->index, bb2->frequency);
295 	    }
296 	  bb->aux = bb2;
297 	  bb2->il.rtl->visited = 1;
298 	  bb = bb2;
299 	  /* In case the trace became infrequent, stop duplicating.  */
300 	  if (ignore_bb_p (bb))
301 	    break;
302 	}
303       if (dump_file)
304 	fprintf (dump_file, " covered now %.1f\n\n",
305 		 traced_insns * 100.0 / weighted_insns);
306     }
307   if (dump_file)
308     fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
309 	     nduplicated * 100 / ninsns);
310 
311   free (blocks);
312   free (trace);
313   free (counts);
314   fibheap_delete (heap);
315 }
316 
317 /* Connect the superblocks into linear sequence.  At the moment we attempt to keep
318    the original order as much as possible, but the algorithm may be made smarter
319    later if needed.  BB reordering pass should void most of the benefits of such
320    change though.  */
321 
322 static void
layout_superblocks(void)323 layout_superblocks (void)
324 {
325   basic_block end = single_succ (ENTRY_BLOCK_PTR);
326   basic_block bb = end->next_bb;
327 
328   while (bb != EXIT_BLOCK_PTR)
329     {
330       edge_iterator ei;
331       edge e, best = NULL;
332       while (end->aux)
333 	end = end->aux;
334 
335       FOR_EACH_EDGE (e, ei, end->succs)
336 	if (e->dest != EXIT_BLOCK_PTR
337 	    && e->dest != single_succ (ENTRY_BLOCK_PTR)
338 	    && !e->dest->il.rtl->visited
339 	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
340 	  best = e;
341 
342       if (best)
343 	{
344 	  end->aux = best->dest;
345 	  best->dest->il.rtl->visited = 1;
346 	}
347       else
348 	for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
349 	  {
350 	    if (!bb->il.rtl->visited)
351 	      {
352 		end->aux = bb;
353 		bb->il.rtl->visited = 1;
354 		break;
355 	      }
356 	  }
357     }
358 }
359 
360 /* Main entry point to this file.  FLAGS is the set of flags to pass
361    to cfg_layout_initialize().  */
362 
363 void
tracer(unsigned int flags)364 tracer (unsigned int flags)
365 {
366   if (n_basic_blocks <= 1)
367     return;
368 
369   cfg_layout_initialize (flags);
370   mark_dfs_back_edges ();
371   if (dump_file)
372     dump_flow_info (dump_file);
373   tail_duplicate ();
374   layout_superblocks ();
375   if (dump_file)
376     dump_flow_info (dump_file);
377   cfg_layout_finalize ();
378 
379   /* Merge basic blocks in duplicated traces.  */
380   cleanup_cfg (CLEANUP_EXPENSIVE);
381 }
382 
383 static bool
gate_handle_tracer(void)384 gate_handle_tracer (void)
385 {
386   return (optimize > 0 && flag_tracer);
387 }
388 
389 /* Run tracer.  */
390 static void
rest_of_handle_tracer(void)391 rest_of_handle_tracer (void)
392 {
393   if (dump_file)
394     dump_flow_info (dump_file);
395   tracer (0);
396   cleanup_cfg (CLEANUP_EXPENSIVE);
397   reg_scan (get_insns (), max_reg_num ());
398 }
399 
400 struct tree_opt_pass pass_tracer =
401 {
402   "tracer",                             /* name */
403   gate_handle_tracer,                   /* gate */
404   rest_of_handle_tracer,                /* execute */
405   NULL,                                 /* sub */
406   NULL,                                 /* next */
407   0,                                    /* static_pass_number */
408   TV_TRACER,                            /* tv_id */
409   0,                                    /* properties_required */
410   0,                                    /* properties_provided */
411   0,                                    /* properties_destroyed */
412   0,                                    /* todo_flags_start */
413   TODO_dump_func,                       /* todo_flags_finish */
414   'T'                                   /* letter */
415 };
416 
417