1 /* Basic block reordering routines for the GNU compiler.
2    Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING.  If not, write to the Free
18    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19    02111-1307, USA.  */
20 
21 /* This (greedy) algorithm constructs traces in several rounds.
22    The construction starts from "seeds".  The seed for the first round
23    is the entry point of function.  When there are more than one seed
24    that one is selected first that has the lowest key in the heap
25    (see function bb_to_key).  Then the algorithm repeatedly adds the most
26    probable successor to the end of a trace.  Finally it connects the traces.
27 
28    There are two parameters: Branch Threshold and Exec Threshold.
29    If the edge to a successor of the actual basic block is lower than
30    Branch Threshold or the frequency of the successor is lower than
31    Exec Threshold the successor will be the seed in one of the next rounds.
32    Each round has these parameters lower than the previous one.
33    The last round has to have these parameters set to zero
34    so that the remaining blocks are picked up.
35 
36    The algorithm selects the most probable successor from all unvisited
37    successors and successors that have been added to this trace.
38    The other successors (that has not been "sent" to the next round) will be
39    other seeds for this round and the secondary traces will start in them.
40    If the successor has not been visited in this trace it is added to the trace
41    (however, there is some heuristic for simple branches).
42    If the successor has been visited in this trace the loop has been found.
43    If the loop has many iterations the loop is rotated so that the
44    source block of the most probable edge going out from the loop
45    is the last block of the trace.
46    If the loop has few iterations and there is no edge from the last block of
47    the loop going out from loop the loop header is duplicated.
48    Finally, the construction of the trace is terminated.
49 
50    When connecting traces it first checks whether there is an edge from the
51    last block of one trace to the first block of another trace.
52    When there are still some unconnected traces it checks whether there exists
53    a basic block BB such that BB is a successor of the last bb of one trace
54    and BB is a predecessor of the first block of another trace. In this case,
55    BB is duplicated and the traces are connected through this duplicate.
56    The rest of traces are simply connected so there will be a jump to the
57    beginning of the rest of trace.
58 
59 
60    References:
61 
62    "Software Trace Cache"
63    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64    http://citeseer.nj.nec.com/15361.html
65 
66 */
67 
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "basic-block.h"
74 #include "flags.h"
75 #include "timevar.h"
76 #include "output.h"
77 #include "cfglayout.h"
78 #include "fibheap.h"
79 #include "target.h"
80 
81 /* The number of rounds.  */
82 #define N_ROUNDS 4
83 
84 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
85 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0};
86 
87 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
88 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0};
89 
90 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
91    block the edge destination is not duplicated while connecting traces.  */
92 #define DUPLICATION_THRESHOLD 100
93 
94 /* Length of unconditional jump instruction.  */
95 static int uncond_jump_length;
96 
97 /* Structure to hold needed information for each basic block.  */
98 typedef struct bbro_basic_block_data_def
99 {
100   /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
101   int start_of_trace;
102 
103   /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
104   int end_of_trace;
105 
106   /* Which heap is BB in (if any)?  */
107   fibheap_t heap;
108 
109   /* Which heap node is BB in (if any)?  */
110   fibnode_t node;
111 } bbro_basic_block_data;
112 
113 /* The current size of the following dynamic array.  */
114 static int array_size;
115 
116 /* The array which holds needed information for basic blocks.  */
117 static bbro_basic_block_data *bbd;
118 
119 /* To avoid frequent reallocation the size of arrays is greater than needed,
120    the number of elements is (not less than) 1.25 * size_wanted.  */
121 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
122 
123 /* Free the memory and set the pointer to NULL.  */
124 #define FREE(P) \
125   do { if (P) { free (P); P = 0; } else { abort (); } } while (0)
126 
127 /* Structure for holding information about a trace.  */
128 struct trace
129 {
130   /* First and last basic block of the trace.  */
131   basic_block first, last;
132 
133   /* The round of the STC creation which this trace was found in.  */
134   int round;
135 
136   /* The length (i.e. the number of basic blocks) of the trace.  */
137   int length;
138 };
139 
140 /* Maximum frequency and count of one of the entry blocks.  */
141 int max_entry_frequency;
142 gcov_type max_entry_count;
143 
144 /* Local function prototypes.  */
145 static void find_traces (int *, struct trace *);
146 static basic_block rotate_loop (edge, struct trace *, int);
147 static void mark_bb_visited (basic_block, int);
148 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
149 				 int, fibheap_t *);
150 static basic_block copy_bb (basic_block, edge, basic_block, int);
151 static fibheapkey_t bb_to_key (basic_block);
152 static bool better_edge_p (basic_block, edge, int, int, int, int);
153 static void connect_traces (int, struct trace *);
154 static bool copy_bb_p (basic_block, int);
155 static int get_uncond_jump_length (void);
156 
157 /* Find the traces for Software Trace Cache.  Chain each trace through
158    RBI()->next.  Store the number of traces to N_TRACES and description of
159    traces to TRACES.  */
160 
161 static void
find_traces(int * n_traces,struct trace * traces)162 find_traces (int *n_traces, struct trace *traces)
163 {
164   int i;
165   edge e;
166   fibheap_t heap;
167 
168   /* Insert entry points of function into heap.  */
169   heap = fibheap_new ();
170   max_entry_frequency = 0;
171   max_entry_count = 0;
172   for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
173     {
174       bbd[e->dest->index].heap = heap;
175       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
176 						    e->dest);
177       if (e->dest->frequency > max_entry_frequency)
178 	max_entry_frequency = e->dest->frequency;
179       if (e->dest->count > max_entry_count)
180 	max_entry_count = e->dest->count;
181     }
182 
183   /* Find the traces.  */
184   for (i = 0; i < N_ROUNDS; i++)
185     {
186       gcov_type count_threshold;
187 
188       if (rtl_dump_file)
189 	fprintf (rtl_dump_file, "STC - round %d\n", i + 1);
190 
191       if (max_entry_count < INT_MAX / 1000)
192 	count_threshold = max_entry_count * exec_threshold[i] / 1000;
193       else
194 	count_threshold = max_entry_count / 1000 * exec_threshold[i];
195 
196       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
197 			   max_entry_frequency * exec_threshold[i] / 1000,
198 			   count_threshold, traces, n_traces, i, &heap);
199     }
200   fibheap_delete (heap);
201 
202   if (rtl_dump_file)
203     {
204       for (i = 0; i < *n_traces; i++)
205 	{
206 	  basic_block bb;
207 	  fprintf (rtl_dump_file, "Trace %d (round %d):  ", i + 1,
208 		   traces[i].round + 1);
209 	  for (bb = traces[i].first; bb != traces[i].last; bb = bb->rbi->next)
210 	    fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency);
211 	  fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency);
212 	}
213       fflush (rtl_dump_file);
214     }
215 }
216 
217 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
218    (with sequential number TRACE_N).  */
219 
220 static basic_block
rotate_loop(edge back_edge,struct trace * trace,int trace_n)221 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
222 {
223   basic_block bb;
224 
225   /* Information about the best end (end after rotation) of the loop.  */
226   basic_block best_bb = NULL;
227   edge best_edge = NULL;
228   int best_freq = -1;
229   gcov_type best_count = -1;
230   /* The best edge is preferred when its destination is not visited yet
231      or is a start block of some trace.  */
232   bool is_preferred = false;
233 
234   /* Find the most frequent edge that goes out from current trace.  */
235   bb = back_edge->dest;
236   do
237     {
238       edge e;
239       for (e = bb->succ; e; e = e->succ_next)
240 	if (e->dest != EXIT_BLOCK_PTR
241 	    && e->dest->rbi->visited != trace_n
242 	    && (e->flags & EDGE_CAN_FALLTHRU)
243 	    && !(e->flags & EDGE_COMPLEX))
244 	{
245 	  if (is_preferred)
246 	    {
247 	      /* The best edge is preferred.  */
248 	      if (!e->dest->rbi->visited
249 		  || bbd[e->dest->index].start_of_trace >= 0)
250 		{
251 		  /* The current edge E is also preferred.  */
252 		  int freq = EDGE_FREQUENCY (e);
253 		  if (freq > best_freq || e->count > best_count)
254 		    {
255 		      best_freq = freq;
256 		      best_count = e->count;
257 		      best_edge = e;
258 		      best_bb = bb;
259 		    }
260 		}
261 	    }
262 	  else
263 	    {
264 	      if (!e->dest->rbi->visited
265 		  || bbd[e->dest->index].start_of_trace >= 0)
266 		{
267 		  /* The current edge E is preferred.  */
268 		  is_preferred = true;
269 		  best_freq = EDGE_FREQUENCY (e);
270 		  best_count = e->count;
271 		  best_edge = e;
272 		  best_bb = bb;
273 		}
274 	      else
275 		{
276 		  int freq = EDGE_FREQUENCY (e);
277 		  if (!best_edge || freq > best_freq || e->count > best_count)
278 		    {
279 		      best_freq = freq;
280 		      best_count = e->count;
281 		      best_edge = e;
282 		      best_bb = bb;
283 		    }
284 		}
285 	    }
286 	}
287       bb = bb->rbi->next;
288     }
289   while (bb != back_edge->dest);
290 
291   if (best_bb)
292     {
293       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
294 	 the trace.  */
295       if (back_edge->dest == trace->first)
296 	{
297 	  trace->first = best_bb->rbi->next;
298 	}
299       else
300 	{
301 	  basic_block prev_bb;
302 
303 	  for (prev_bb = trace->first;
304 	       prev_bb->rbi->next != back_edge->dest;
305 	       prev_bb = prev_bb->rbi->next)
306 	    ;
307 	  prev_bb->rbi->next = best_bb->rbi->next;
308 
309 	  /* Try to get rid of uncond jump to cond jump.  */
310 	  if (prev_bb->succ && !prev_bb->succ->succ_next)
311 	    {
312 	      basic_block header = prev_bb->succ->dest;
313 
314 	      /* Duplicate HEADER if it is a small block containing cond jump
315 		 in the end.  */
316 	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0))
317 		{
318 		  copy_bb (header, prev_bb->succ, prev_bb, trace_n);
319 		}
320 	    }
321 	}
322     }
323   else
324     {
325       /* We have not found suitable loop tail so do no rotation.  */
326       best_bb = back_edge->src;
327     }
328   best_bb->rbi->next = NULL;
329   return best_bb;
330 }
331 
332 /* This function marks BB that it was visited in trace number TRACE.  */
333 
334 static void
mark_bb_visited(basic_block bb,int trace)335 mark_bb_visited (basic_block bb, int trace)
336 {
337   bb->rbi->visited = trace;
338   if (bbd[bb->index].heap)
339     {
340       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
341       bbd[bb->index].heap = NULL;
342       bbd[bb->index].node = NULL;
343     }
344 }
345 
346 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
347    not include basic blocks their probability is lower than BRANCH_TH or their
348    frequency is lower than EXEC_TH into traces (or count is lower than
349    COUNT_TH).  It stores the new traces into TRACES and modifies the number of
350    traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
351    expects that starting basic blocks are in *HEAP and at the end it deletes
352    *HEAP and stores starting points for the next round into new *HEAP.  */
353 
354 static void
find_traces_1_round(int branch_th,int exec_th,gcov_type count_th,struct trace * traces,int * n_traces,int round,fibheap_t * heap)355 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
356 		     struct trace *traces, int *n_traces, int round,
357 		     fibheap_t *heap)
358 {
359   /* Heap for discarded basic blocks which are possible starting points for
360      the next round.  */
361   fibheap_t new_heap = fibheap_new ();
362 
363   while (!fibheap_empty (*heap))
364     {
365       basic_block bb;
366       struct trace *trace;
367       edge best_edge, e;
368       fibheapkey_t key;
369 
370       bb = fibheap_extract_min (*heap);
371       bbd[bb->index].heap = NULL;
372       bbd[bb->index].node = NULL;
373 
374       if (rtl_dump_file)
375 	fprintf (rtl_dump_file, "Getting bb %d\n", bb->index);
376 
377       /* If the BB's frequency is too low send BB to the next round.  */
378       if (round < N_ROUNDS - 1
379 	  && (bb->frequency < exec_th || bb->count < count_th
380 	      || probably_never_executed_bb_p (bb)))
381 	{
382 	  int key = bb_to_key (bb);
383 	  bbd[bb->index].heap = new_heap;
384 	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
385 
386 	  if (rtl_dump_file)
387 	    fprintf (rtl_dump_file,
388 		     "  Possible start point of next round: %d (key: %d)\n",
389 		     bb->index, key);
390 	  continue;
391 	}
392 
393       trace = traces + *n_traces;
394       trace->first = bb;
395       trace->round = round;
396       trace->length = 0;
397       (*n_traces)++;
398 
399       do
400 	{
401 	  int prob, freq;
402 
403 	  /* The probability and frequency of the best edge.  */
404 	  int best_prob = INT_MIN / 2;
405 	  int best_freq = INT_MIN / 2;
406 
407 	  best_edge = NULL;
408 	  mark_bb_visited (bb, *n_traces);
409 	  trace->length++;
410 
411 	  if (rtl_dump_file)
412 	    fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n",
413 		     bb->index, *n_traces - 1);
414 
415 	  /* Select the successor that will be placed after BB.  */
416 	  for (e = bb->succ; e; e = e->succ_next)
417 	    {
418 #ifdef ENABLE_CHECKING
419 	      if (e->flags & EDGE_FAKE)
420 		abort ();
421 #endif
422 
423 	      if (e->dest == EXIT_BLOCK_PTR)
424 		continue;
425 
426 	      if (e->dest->rbi->visited
427 		  && e->dest->rbi->visited != *n_traces)
428 		continue;
429 
430 	      prob = e->probability;
431 	      freq = EDGE_FREQUENCY (e);
432 
433 	      /* Edge that cannot be fallthru or improbable or infrequent
434 		 successor (ie. it is unsuitable successor).  */
435 	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
436 		  || prob < branch_th || freq < exec_th || e->count < count_th)
437 		continue;
438 
439 	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq))
440 		{
441 		  best_edge = e;
442 		  best_prob = prob;
443 		  best_freq = freq;
444 		}
445 	    }
446 
447 	  /* If the best destination has multiple predecessors, and can be
448 	     duplicated cheaper than a jump, don't allow it to be added
449 	     to a trace.  We'll duplicate it when connecting traces.  */
450 	  if (best_edge && best_edge->dest->pred->pred_next
451 	      && copy_bb_p (best_edge->dest, 0))
452 	    best_edge = NULL;
453 
454 	  /* Add all non-selected successors to the heaps.  */
455 	  for (e = bb->succ; e; e = e->succ_next)
456 	    {
457 	      if (e == best_edge
458 		  || e->dest == EXIT_BLOCK_PTR
459 		  || e->dest->rbi->visited)
460 		continue;
461 
462 	      key = bb_to_key (e->dest);
463 
464 	      if (bbd[e->dest->index].heap)
465 		{
466 		  /* E->DEST is already in some heap.  */
467 		  if (key != bbd[e->dest->index].node->key)
468 		    {
469 		      if (rtl_dump_file)
470 			{
471 			  fprintf (rtl_dump_file,
472 				   "Changing key for bb %d from %ld to %ld.\n",
473 				   e->dest->index,
474 				   (long) bbd[e->dest->index].node->key,
475 				   key);
476 			}
477 		      fibheap_replace_key (bbd[e->dest->index].heap,
478 					   bbd[e->dest->index].node, key);
479 		    }
480 		}
481 	      else
482 		{
483 		  fibheap_t which_heap = *heap;
484 
485 		  prob = e->probability;
486 		  freq = EDGE_FREQUENCY (e);
487 
488 		  if (!(e->flags & EDGE_CAN_FALLTHRU)
489 		      || (e->flags & EDGE_COMPLEX)
490 		      || prob < branch_th || freq < exec_th
491 		      || e->count < count_th)
492 		    {
493 		      if (round < N_ROUNDS - 1)
494 			which_heap = new_heap;
495 		    }
496 
497 		  bbd[e->dest->index].heap = which_heap;
498 		  bbd[e->dest->index].node = fibheap_insert (which_heap,
499 								key, e->dest);
500 
501 		  if (rtl_dump_file)
502 		    {
503 		      fprintf (rtl_dump_file,
504 			       "  Possible start of %s round: %d (key: %ld)\n",
505 			       (which_heap == new_heap) ? "next" : "this",
506 			       e->dest->index, (long) key);
507 		    }
508 
509 		}
510 	    }
511 
512 	  if (best_edge) /* Suitable successor was found.  */
513 	    {
514 	      if (best_edge->dest->rbi->visited == *n_traces)
515 		{
516 		  /* We do nothing with one basic block loops.  */
517 		  if (best_edge->dest != bb)
518 		    {
519 		      if (EDGE_FREQUENCY (best_edge)
520 			  > 4 * best_edge->dest->frequency / 5)
521 			{
522 			  /* The loop has at least 4 iterations.  If the loop
523 			     header is not the first block of the function
524 			     we can rotate the loop.  */
525 
526 			  if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
527 			    {
528 			      if (rtl_dump_file)
529 				{
530 				  fprintf (rtl_dump_file,
531 					   "Rotating loop %d - %d\n",
532 					   best_edge->dest->index, bb->index);
533 				}
534 			      bb->rbi->next = best_edge->dest;
535 			      bb = rotate_loop (best_edge, trace, *n_traces);
536 			    }
537 			}
538 		      else
539 			{
540 			  /* The loop has less than 4 iterations.  */
541 
542 			  /* Check whether there is another edge from BB.  */
543 			  edge another_edge;
544 			  for (another_edge = bb->succ;
545 			       another_edge;
546 			       another_edge = another_edge->succ_next)
547 			    if (another_edge != best_edge)
548 			      break;
549 
550 			  if (!another_edge && copy_bb_p (best_edge->dest,
551 							  !optimize_size))
552 			    {
553 			      bb = copy_bb (best_edge->dest, best_edge, bb,
554 					    *n_traces);
555 			    }
556 			}
557 		    }
558 
559 		  /* Terminate the trace.  */
560 		  break;
561 		}
562 	      else
563 		{
564 		  /* Check for a situation
565 
566 		    A
567 		   /|
568 		  B |
569 		   \|
570 		    C
571 
572 		  where
573 		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
574 		    >= EDGE_FREQUENCY (AC).
575 		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
576 		  Best ordering is then A B C.
577 
578 		  This situation is created for example by:
579 
580 		  if (A) B;
581 		  C;
582 
583 		  */
584 
585 		  for (e = bb->succ; e; e = e->succ_next)
586 		    if (e != best_edge
587 			&& (e->flags & EDGE_CAN_FALLTHRU)
588 			&& !(e->flags & EDGE_COMPLEX)
589 			&& !e->dest->rbi->visited
590 			&& !e->dest->pred->pred_next
591 			&& e->dest->succ
592 			&& (e->dest->succ->flags & EDGE_CAN_FALLTHRU)
593 			&& !(e->dest->succ->flags & EDGE_COMPLEX)
594 			&& !e->dest->succ->succ_next
595 			&& e->dest->succ->dest == best_edge->dest
596 			&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
597 		      {
598 			best_edge = e;
599 			if (rtl_dump_file)
600 			  fprintf (rtl_dump_file, "Selecting BB %d\n",
601 				   best_edge->dest->index);
602 			break;
603 		      }
604 
605 		  bb->rbi->next = best_edge->dest;
606 		  bb = best_edge->dest;
607 		}
608 	    }
609 	}
610       while (best_edge);
611       trace->last = bb;
612       bbd[trace->first->index].start_of_trace = *n_traces - 1;
613       bbd[trace->last->index].end_of_trace = *n_traces - 1;
614 
615       /* The trace is terminated so we have to recount the keys in heap
616 	 (some block can have a lower key because now one of its predecessors
617 	 is an end of the trace).  */
618       for (e = bb->succ; e; e = e->succ_next)
619 	{
620 	  if (e->dest == EXIT_BLOCK_PTR
621 	      || e->dest->rbi->visited)
622 	    continue;
623 
624 	  if (bbd[e->dest->index].heap)
625 	    {
626 	      key = bb_to_key (e->dest);
627 	      if (key != bbd[e->dest->index].node->key)
628 		{
629 		  if (rtl_dump_file)
630 		    {
631 		      fprintf (rtl_dump_file,
632 			       "Changing key for bb %d from %ld to %ld.\n",
633 			       e->dest->index,
634 			       (long) bbd[e->dest->index].node->key, key);
635 		    }
636 		  fibheap_replace_key (bbd[e->dest->index].heap,
637 				       bbd[e->dest->index].node,
638 				       key);
639 		}
640 	    }
641 	}
642     }
643 
644   fibheap_delete (*heap);
645 
646   /* "Return" the new heap.  */
647   *heap = new_heap;
648 }
649 
650 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
651    it to trace after BB, mark OLD_BB visited and update pass' data structures
652    (TRACE is a number of trace which OLD_BB is duplicated to).  */
653 
654 static basic_block
copy_bb(basic_block old_bb,edge e,basic_block bb,int trace)655 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
656 {
657   basic_block new_bb;
658 
659   new_bb = cfg_layout_duplicate_bb (old_bb, e);
660   if (e->dest != new_bb)
661     abort ();
662   if (e->dest->rbi->visited)
663     abort ();
664   if (rtl_dump_file)
665     fprintf (rtl_dump_file,
666 	     "Duplicated bb %d (created bb %d)\n",
667 	     old_bb->index, new_bb->index);
668   new_bb->rbi->visited = trace;
669   new_bb->rbi->next = bb->rbi->next;
670   bb->rbi->next = new_bb;
671 
672   if (new_bb->index >= array_size || last_basic_block > array_size)
673     {
674       int i;
675       int new_size;
676 
677       new_size = MAX (last_basic_block, new_bb->index + 1);
678       new_size = GET_ARRAY_SIZE (new_size);
679       bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
680       for (i = array_size; i < new_size; i++)
681 	{
682 	  bbd[i].start_of_trace = -1;
683 	  bbd[i].end_of_trace = -1;
684 	  bbd[i].heap = NULL;
685 	  bbd[i].node = NULL;
686 	}
687       array_size = new_size;
688 
689       if (rtl_dump_file)
690 	{
691 	  fprintf (rtl_dump_file,
692 		   "Growing the dynamic array to %d elements.\n",
693 		   array_size);
694 	}
695     }
696 
697   return new_bb;
698 }
699 
700 /* Compute and return the key (for the heap) of the basic block BB.  */
701 
702 static fibheapkey_t
bb_to_key(basic_block bb)703 bb_to_key (basic_block bb)
704 {
705   edge e;
706 
707   int priority = 0;
708 
709   /* Do not start in probably never executed blocks.  */
710   if (probably_never_executed_bb_p (bb))
711     return BB_FREQ_MAX;
712 
713   /* Prefer blocks whose predecessor is an end of some trace
714      or whose predecessor edge is EDGE_DFS_BACK.  */
715   for (e = bb->pred; e; e = e->pred_next)
716     {
717       if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
718 	  || (e->flags & EDGE_DFS_BACK))
719 	{
720 	  int edge_freq = EDGE_FREQUENCY (e);
721 
722 	  if (edge_freq > priority)
723 	    priority = edge_freq;
724 	}
725     }
726 
727   if (priority)
728     /* The block with priority should have significantly lower key.  */
729     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
730   return -bb->frequency;
731 }
732 
733 /* Return true when the edge E from basic block BB is better than the temporary
734    best edge (details are in function).  The probability of edge E is PROB. The
735    frequency of the successor is FREQ.  The current best probability is
736    BEST_PROB, the best frequency is BEST_FREQ.
737    The edge is considered to be equivalent when PROB does not differ much from
738    BEST_PROB; similarly for frequency.  */
739 
740 static bool
better_edge_p(basic_block bb,edge e,int prob,int freq,int best_prob,int best_freq)741 better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
742 	       int best_freq)
743 {
744   bool is_better_edge;
745 
746   /* The BEST_* values do not have to be best, but can be a bit smaller than
747      maximum values.  */
748   int diff_prob = best_prob / 10;
749   int diff_freq = best_freq / 10;
750 
751   if (prob > best_prob + diff_prob)
752     /* The edge has higher probability than the temporary best edge.  */
753     is_better_edge = true;
754   else if (prob < best_prob - diff_prob)
755     /* The edge has lower probability than the temporary best edge.  */
756     is_better_edge = false;
757   else if (freq < best_freq - diff_freq)
758     /* The edge and the temporary best edge  have almost equivalent
759        probabilities.  The higher frequency of a successor now means
760        that there is another edge going into that successor.
761        This successor has lower frequency so it is better.  */
762     is_better_edge = true;
763   else if (freq > best_freq + diff_freq)
764     /* This successor has higher frequency so it is worse.  */
765     is_better_edge = false;
766   else if (e->dest->prev_bb == bb)
767     /* The edges have equivalent probabilities and the successors
768        have equivalent frequencies.  Select the previous successor.  */
769     is_better_edge = true;
770   else
771     is_better_edge = false;
772 
773   return is_better_edge;
774 }
775 
776 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
777 
778 static void
connect_traces(int n_traces,struct trace * traces)779 connect_traces (int n_traces, struct trace *traces)
780 {
781   int i;
782   bool *connected;
783   int last_trace;
784   int freq_threshold;
785   gcov_type count_threshold;
786 
787   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
788   if (max_entry_count < INT_MAX / 1000)
789     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
790   else
791     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
792 
793   connected = xcalloc (n_traces, sizeof (bool));
794   last_trace = -1;
795   for (i = 0; i < n_traces; i++)
796     {
797       int t = i;
798       int t2;
799       edge e, best;
800       int best_len;
801 
802       if (connected[t])
803 	continue;
804 
805       connected[t] = true;
806 
807       /* Find the predecessor traces.  */
808       for (t2 = t; t2 > 0;)
809 	{
810 	  best = NULL;
811 	  best_len = 0;
812 	  for (e = traces[t2].first->pred; e; e = e->pred_next)
813 	    {
814 	      int si = e->src->index;
815 
816 	      if (e->src != ENTRY_BLOCK_PTR
817 		  && (e->flags & EDGE_CAN_FALLTHRU)
818 		  && !(e->flags & EDGE_COMPLEX)
819 		  && bbd[si].end_of_trace >= 0
820 		  && !connected[bbd[si].end_of_trace]
821 		  && (!best
822 		      || e->probability > best->probability
823 		      || (e->probability == best->probability
824 			  && traces[bbd[si].end_of_trace].length > best_len)))
825 		{
826 		  best = e;
827 		  best_len = traces[bbd[si].end_of_trace].length;
828 		}
829 	    }
830 	  if (best)
831 	    {
832 	      best->src->rbi->next = best->dest;
833 	      t2 = bbd[best->src->index].end_of_trace;
834 	      connected[t2] = true;
835 	      if (rtl_dump_file)
836 		{
837 		  fprintf (rtl_dump_file, "Connection: %d %d\n",
838 			   best->src->index, best->dest->index);
839 		}
840 	    }
841 	  else
842 	    break;
843 	}
844 
845       if (last_trace >= 0)
846 	traces[last_trace].last->rbi->next = traces[t2].first;
847       last_trace = t;
848 
849       /* Find the successor traces.  */
850       while (1)
851 	{
852 	  /* Find the continuation of the chain.  */
853 	  best = NULL;
854 	  best_len = 0;
855 	  for (e = traces[t].last->succ; e; e = e->succ_next)
856 	    {
857 	      int di = e->dest->index;
858 
859 	      if (e->dest != EXIT_BLOCK_PTR
860 		  && (e->flags & EDGE_CAN_FALLTHRU)
861 		  && !(e->flags & EDGE_COMPLEX)
862 		  && bbd[di].start_of_trace >= 0
863 		  && !connected[bbd[di].start_of_trace]
864 		  && (!best
865 		      || e->probability > best->probability
866 		      || (e->probability == best->probability
867 			  && traces[bbd[di].start_of_trace].length > best_len)))
868 		{
869 		  best = e;
870 		  best_len = traces[bbd[di].start_of_trace].length;
871 		}
872 	    }
873 
874 	  if (best)
875 	    {
876 	      if (rtl_dump_file)
877 		{
878 		  fprintf (rtl_dump_file, "Connection: %d %d\n",
879 			   best->src->index, best->dest->index);
880 		}
881 	      t = bbd[best->dest->index].start_of_trace;
882 	      traces[last_trace].last->rbi->next = traces[t].first;
883 	      connected[t] = true;
884 	      last_trace = t;
885 	    }
886 	  else
887 	    {
888 	      /* Try to connect the traces by duplication of 1 block.  */
889 	      edge e2;
890 	      basic_block next_bb = NULL;
891 	      bool try_copy = false;
892 
893 	      for (e = traces[t].last->succ; e; e = e->succ_next)
894 		if (e->dest != EXIT_BLOCK_PTR
895 		    && (e->flags & EDGE_CAN_FALLTHRU)
896 		    && !(e->flags & EDGE_COMPLEX)
897 		    && (!best || e->probability > best->probability))
898 		  {
899 		    edge best2 = NULL;
900 		    int best2_len = 0;
901 
902 		    /* If the destination is a start of a trace which is only
903 		       one block long, then no need to search the successor
904 		       blocks of the trace.  Accept it.  */
905 		    if (bbd[e->dest->index].start_of_trace >= 0
906 			&& traces[bbd[e->dest->index].start_of_trace].length
907 			   == 1)
908 		      {
909 			best = e;
910 			try_copy = true;
911 			continue;
912 		      }
913 
914 		    for (e2 = e->dest->succ; e2; e2 = e2->succ_next)
915 		      {
916 			int di = e2->dest->index;
917 
918 			if (e2->dest == EXIT_BLOCK_PTR
919 			    || ((e2->flags & EDGE_CAN_FALLTHRU)
920 				&& !(e2->flags & EDGE_COMPLEX)
921 				&& bbd[di].start_of_trace >= 0
922 				&& !connected[bbd[di].start_of_trace]
923 				&& (EDGE_FREQUENCY (e2) >= freq_threshold)
924 				&& (e2->count >= count_threshold)
925 				&& (!best2
926 				    || e2->probability > best2->probability
927 				    || (e2->probability == best2->probability
928 					&& traces[bbd[di].start_of_trace].length
929 					   > best2_len))))
930 			  {
931 			    best = e;
932 			    best2 = e2;
933 			    if (e2->dest != EXIT_BLOCK_PTR)
934 			      best2_len = traces[bbd[di].start_of_trace].length;
935 			    else
936 			      best2_len = INT_MAX;
937 			    next_bb = e2->dest;
938 			    try_copy = true;
939 			  }
940 		      }
941 		  }
942 
943 	      /* Copy tiny blocks always; copy larger blocks only when the
944 		 edge is traversed frequently enough.  */
945 	      if (try_copy
946 		  && copy_bb_p (best->dest,
947 				!optimize_size
948 				&& EDGE_FREQUENCY (best) >= freq_threshold
949 				&& best->count >= count_threshold))
950 		{
951 		  basic_block new_bb;
952 
953 		  if (rtl_dump_file)
954 		    {
955 		      fprintf (rtl_dump_file, "Connection: %d %d ",
956 			       traces[t].last->index, best->dest->index);
957 		      if (!next_bb)
958 			fputc ('\n', rtl_dump_file);
959 		      else if (next_bb == EXIT_BLOCK_PTR)
960 			fprintf (rtl_dump_file, "exit\n");
961 		      else
962 			fprintf (rtl_dump_file, "%d\n", next_bb->index);
963 		    }
964 
965 		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
966 		  traces[t].last = new_bb;
967 		  if (next_bb && next_bb != EXIT_BLOCK_PTR)
968 		    {
969 		      t = bbd[next_bb->index].start_of_trace;
970 		      traces[last_trace].last->rbi->next = traces[t].first;
971 		      connected[t] = true;
972 		      last_trace = t;
973 		    }
974 		  else
975 		    break;	/* Stop finding the successor traces.  */
976 		}
977 	      else
978 		break;	/* Stop finding the successor traces.  */
979 	    }
980 	}
981     }
982 
983   if (rtl_dump_file)
984     {
985       basic_block bb;
986 
987       fprintf (rtl_dump_file, "Final order:\n");
988       for (bb = traces[0].first; bb; bb = bb->rbi->next)
989 	fprintf (rtl_dump_file, "%d ", bb->index);
990       fprintf (rtl_dump_file, "\n");
991       fflush (rtl_dump_file);
992     }
993 
994   FREE (connected);
995 }
996 
997 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
998    when code size is allowed to grow by duplication.  */
999 
1000 static bool
copy_bb_p(basic_block bb,int code_may_grow)1001 copy_bb_p (basic_block bb, int code_may_grow)
1002 {
1003   int size = 0;
1004   int max_size = uncond_jump_length;
1005   rtx insn;
1006   int n_succ;
1007   edge e;
1008 
1009   if (!bb->frequency)
1010     return false;
1011   if (!bb->pred || !bb->pred->pred_next)
1012     return false;
1013   if (!cfg_layout_can_duplicate_bb_p (bb))
1014     return false;
1015 
1016   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1017   n_succ = 0;
1018   for (e = bb->succ; e; e = e->succ_next)
1019     {
1020       n_succ++;
1021       if (n_succ > 8)
1022 	return false;
1023     }
1024 
1025   if (code_may_grow && maybe_hot_bb_p (bb))
1026     max_size *= 8;
1027 
1028   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1029        insn = NEXT_INSN (insn))
1030     {
1031       if (INSN_P (insn))
1032 	size += get_attr_length (insn);
1033     }
1034 
1035   if (size <= max_size)
1036     return true;
1037 
1038   if (rtl_dump_file)
1039     {
1040       fprintf (rtl_dump_file,
1041 	       "Block %d can't be copied because its size = %d.\n",
1042 	       bb->index, size);
1043     }
1044 
1045   return false;
1046 }
1047 
1048 /* Return the length of unconditional jump instruction.  */
1049 
1050 static int
get_uncond_jump_length(void)1051 get_uncond_jump_length (void)
1052 {
1053   rtx label, jump;
1054   int length;
1055 
1056   label = emit_label_before (gen_label_rtx (), get_insns ());
1057   jump = emit_jump_insn (gen_jump (label));
1058 
1059   length = get_attr_length (jump);
1060 
1061   delete_insn (jump);
1062   delete_insn (label);
1063   return length;
1064 }
1065 
1066 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1067    the set of flags to pass to cfg_layout_initialize().  */
1068 
1069 void
reorder_basic_blocks(unsigned int flags)1070 reorder_basic_blocks (unsigned int flags)
1071 {
1072   int n_traces;
1073   int i;
1074   struct trace *traces;
1075 
1076   if (n_basic_blocks <= 1)
1077     return;
1078 
1079   if ((* targetm.cannot_modify_jumps_p) ())
1080     return;
1081 
1082   timevar_push (TV_REORDER_BLOCKS);
1083 
1084   cfg_layout_initialize (flags);
1085 
1086   set_edge_can_fallthru_flag ();
1087   mark_dfs_back_edges ();
1088 
1089   /* We are estimating the length of uncond jump insn only once since the code
1090      for getting the insn length always returns the minimal length now.  */
1091   if (uncond_jump_length == 0)
1092     uncond_jump_length = get_uncond_jump_length ();
1093 
1094   /* We need to know some information for each basic block.  */
1095   array_size = GET_ARRAY_SIZE (last_basic_block);
1096   bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1097   for (i = 0; i < array_size; i++)
1098     {
1099       bbd[i].start_of_trace = -1;
1100       bbd[i].end_of_trace = -1;
1101       bbd[i].heap = NULL;
1102       bbd[i].node = NULL;
1103     }
1104 
1105   traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1106   n_traces = 0;
1107   find_traces (&n_traces, traces);
1108   connect_traces (n_traces, traces);
1109   FREE (traces);
1110   FREE (bbd);
1111 
1112   if (rtl_dump_file)
1113     dump_flow_info (rtl_dump_file);
1114 
1115   cfg_layout_finalize ();
1116 
1117   timevar_pop (TV_REORDER_BLOCKS);
1118 }
1119