1 /* Basic block reordering routines for the GNU compiler.
2    Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING.  If not, write to the Free
18    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19    02110-1301, USA.  */
20 
21 /* This (greedy) algorithm constructs traces in several rounds.
22    The construction starts from "seeds".  The seed for the first round
23    is the entry point of function.  When there are more than one seed
24    that one is selected first that has the lowest key in the heap
25    (see function bb_to_key).  Then the algorithm repeatedly adds the most
26    probable successor to the end of a trace.  Finally it connects the traces.
27 
28    There are two parameters: Branch Threshold and Exec Threshold.
29    If the edge to a successor of the actual basic block is lower than
30    Branch Threshold or the frequency of the successor is lower than
31    Exec Threshold the successor will be the seed in one of the next rounds.
32    Each round has these parameters lower than the previous one.
33    The last round has to have these parameters set to zero
34    so that the remaining blocks are picked up.
35 
36    The algorithm selects the most probable successor from all unvisited
37    successors and successors that have been added to this trace.
38    The other successors (that has not been "sent" to the next round) will be
39    other seeds for this round and the secondary traces will start in them.
40    If the successor has not been visited in this trace it is added to the trace
41    (however, there is some heuristic for simple branches).
42    If the successor has been visited in this trace the loop has been found.
43    If the loop has many iterations the loop is rotated so that the
44    source block of the most probable edge going out from the loop
45    is the last block of the trace.
46    If the loop has few iterations and there is no edge from the last block of
47    the loop going out from loop the loop header is duplicated.
48    Finally, the construction of the trace is terminated.
49 
50    When connecting traces it first checks whether there is an edge from the
51    last block of one trace to the first block of another trace.
52    When there are still some unconnected traces it checks whether there exists
53    a basic block BB such that BB is a successor of the last bb of one trace
54    and BB is a predecessor of the first block of another trace. In this case,
55    BB is duplicated and the traces are connected through this duplicate.
56    The rest of traces are simply connected so there will be a jump to the
57    beginning of the rest of trace.
58 
59 
60    References:
61 
62    "Software Trace Cache"
63    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64    http://citeseer.nj.nec.com/15361.html
65 
66 */
67 
68 #include "config.h"
69 #include "system.h"
70 #include "coretypes.h"
71 #include "tm.h"
72 #include "rtl.h"
73 #include "regs.h"
74 #include "flags.h"
75 #include "timevar.h"
76 #include "output.h"
77 #include "cfglayout.h"
78 #include "fibheap.h"
79 #include "target.h"
80 #include "function.h"
81 #include "tm_p.h"
82 #include "obstack.h"
83 #include "expr.h"
84 #include "params.h"
85 #include "toplev.h"
86 #include "tree-pass.h"
87 
88 #ifndef HAVE_conditional_execution
89 #define HAVE_conditional_execution 0
90 #endif
91 
92 /* The number of rounds.  In most cases there will only be 4 rounds, but
93    when partitioning hot and cold basic blocks into separate sections of
94    the .o file there will be an extra round.*/
95 #define N_ROUNDS 5
96 
97 /* Stubs in case we don't have a return insn.
98    We have to check at runtime too, not only compiletime.  */
99 
100 #ifndef HAVE_return
101 #define HAVE_return 0
102 #define gen_return() NULL_RTX
103 #endif
104 
105 
106 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
107 static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
108 
109 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
110 static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
111 
112 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
113    block the edge destination is not duplicated while connecting traces.  */
114 #define DUPLICATION_THRESHOLD 100
115 
116 /* Length of unconditional jump instruction.  */
117 static int uncond_jump_length;
118 
119 /* Structure to hold needed information for each basic block.  */
120 typedef struct bbro_basic_block_data_def
121 {
122   /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
123   int start_of_trace;
124 
125   /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
126   int end_of_trace;
127 
128   /* Which trace is the bb in?  */
129   int in_trace;
130 
131   /* Which heap is BB in (if any)?  */
132   fibheap_t heap;
133 
134   /* Which heap node is BB in (if any)?  */
135   fibnode_t node;
136 } bbro_basic_block_data;
137 
138 /* The current size of the following dynamic array.  */
139 static int array_size;
140 
141 /* The array which holds needed information for basic blocks.  */
142 static bbro_basic_block_data *bbd;
143 
144 /* To avoid frequent reallocation the size of arrays is greater than needed,
145    the number of elements is (not less than) 1.25 * size_wanted.  */
146 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
147 
148 /* Free the memory and set the pointer to NULL.  */
149 #define FREE(P) (gcc_assert (P), free (P), P = 0)
150 
151 /* Structure for holding information about a trace.  */
152 struct trace
153 {
154   /* First and last basic block of the trace.  */
155   basic_block first, last;
156 
157   /* The round of the STC creation which this trace was found in.  */
158   int round;
159 
160   /* The length (i.e. the number of basic blocks) of the trace.  */
161   int length;
162 };
163 
164 /* Maximum frequency and count of one of the entry blocks.  */
165 static int max_entry_frequency;
166 static gcov_type max_entry_count;
167 
168 /* Local function prototypes.  */
169 static void find_traces (int *, struct trace *);
170 static basic_block rotate_loop (edge, struct trace *, int);
171 static void mark_bb_visited (basic_block, int);
172 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
173 				 int, fibheap_t *, int);
174 static basic_block copy_bb (basic_block, edge, basic_block, int);
175 static fibheapkey_t bb_to_key (basic_block);
176 static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
177 static void connect_traces (int, struct trace *);
178 static bool copy_bb_p (basic_block, int);
179 static int get_uncond_jump_length (void);
180 static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
181 static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
182 								  int *,
183 								  int *);
184 static void add_labels_and_missing_jumps (edge *, int);
185 static void add_reg_crossing_jump_notes (void);
186 static void fix_up_fall_thru_edges (void);
187 static void fix_edges_for_rarely_executed_code (edge *, int);
188 static void fix_crossing_conditional_branches (void);
189 static void fix_crossing_unconditional_branches (void);
190 
191 /* Check to see if bb should be pushed into the next round of trace
192    collections or not.  Reasons for pushing the block forward are 1).
193    If the block is cold, we are doing partitioning, and there will be
194    another round (cold partition blocks are not supposed to be
195    collected into traces until the very last round); or 2). There will
196    be another round, and the basic block is not "hot enough" for the
197    current round of trace collection.  */
198 
199 static bool
push_to_next_round_p(basic_block bb,int round,int number_of_rounds,int exec_th,gcov_type count_th)200 push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
201 		      int exec_th, gcov_type count_th)
202 {
203   bool there_exists_another_round;
204   bool block_not_hot_enough;
205 
206   there_exists_another_round = round < number_of_rounds - 1;
207 
208   block_not_hot_enough = (bb->frequency < exec_th
209 			  || bb->count < count_th
210 			  || probably_never_executed_bb_p (bb));
211 
212   if (there_exists_another_round
213       && block_not_hot_enough)
214     return true;
215   else
216     return false;
217 }
218 
219 /* Find the traces for Software Trace Cache.  Chain each trace through
220    RBI()->next.  Store the number of traces to N_TRACES and description of
221    traces to TRACES.  */
222 
223 static void
find_traces(int * n_traces,struct trace * traces)224 find_traces (int *n_traces, struct trace *traces)
225 {
226   int i;
227   int number_of_rounds;
228   edge e;
229   edge_iterator ei;
230   fibheap_t heap;
231 
232   /* Add one extra round of trace collection when partitioning hot/cold
233      basic blocks into separate sections.  The last round is for all the
234      cold blocks (and ONLY the cold blocks).  */
235 
236   number_of_rounds = N_ROUNDS - 1;
237 
238   /* Insert entry points of function into heap.  */
239   heap = fibheap_new ();
240   max_entry_frequency = 0;
241   max_entry_count = 0;
242   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
243     {
244       bbd[e->dest->index].heap = heap;
245       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
246 						    e->dest);
247       if (e->dest->frequency > max_entry_frequency)
248 	max_entry_frequency = e->dest->frequency;
249       if (e->dest->count > max_entry_count)
250 	max_entry_count = e->dest->count;
251     }
252 
253   /* Find the traces.  */
254   for (i = 0; i < number_of_rounds; i++)
255     {
256       gcov_type count_threshold;
257 
258       if (dump_file)
259 	fprintf (dump_file, "STC - round %d\n", i + 1);
260 
261       if (max_entry_count < INT_MAX / 1000)
262 	count_threshold = max_entry_count * exec_threshold[i] / 1000;
263       else
264 	count_threshold = max_entry_count / 1000 * exec_threshold[i];
265 
266       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
267 			   max_entry_frequency * exec_threshold[i] / 1000,
268 			   count_threshold, traces, n_traces, i, &heap,
269 			   number_of_rounds);
270     }
271   fibheap_delete (heap);
272 
273   if (dump_file)
274     {
275       for (i = 0; i < *n_traces; i++)
276 	{
277 	  basic_block bb;
278 	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
279 		   traces[i].round + 1);
280 	  for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
281 	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
282 	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
283 	}
284       fflush (dump_file);
285     }
286 }
287 
288 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
289    (with sequential number TRACE_N).  */
290 
291 static basic_block
rotate_loop(edge back_edge,struct trace * trace,int trace_n)292 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
293 {
294   basic_block bb;
295 
296   /* Information about the best end (end after rotation) of the loop.  */
297   basic_block best_bb = NULL;
298   edge best_edge = NULL;
299   int best_freq = -1;
300   gcov_type best_count = -1;
301   /* The best edge is preferred when its destination is not visited yet
302      or is a start block of some trace.  */
303   bool is_preferred = false;
304 
305   /* Find the most frequent edge that goes out from current trace.  */
306   bb = back_edge->dest;
307   do
308     {
309       edge e;
310       edge_iterator ei;
311 
312       FOR_EACH_EDGE (e, ei, bb->succs)
313 	if (e->dest != EXIT_BLOCK_PTR
314 	    && e->dest->il.rtl->visited != trace_n
315 	    && (e->flags & EDGE_CAN_FALLTHRU)
316 	    && !(e->flags & EDGE_COMPLEX))
317 	{
318 	  if (is_preferred)
319 	    {
320 	      /* The best edge is preferred.  */
321 	      if (!e->dest->il.rtl->visited
322 		  || bbd[e->dest->index].start_of_trace >= 0)
323 		{
324 		  /* The current edge E is also preferred.  */
325 		  int freq = EDGE_FREQUENCY (e);
326 		  if (freq > best_freq || e->count > best_count)
327 		    {
328 		      best_freq = freq;
329 		      best_count = e->count;
330 		      best_edge = e;
331 		      best_bb = bb;
332 		    }
333 		}
334 	    }
335 	  else
336 	    {
337 	      if (!e->dest->il.rtl->visited
338 		  || bbd[e->dest->index].start_of_trace >= 0)
339 		{
340 		  /* The current edge E is preferred.  */
341 		  is_preferred = true;
342 		  best_freq = EDGE_FREQUENCY (e);
343 		  best_count = e->count;
344 		  best_edge = e;
345 		  best_bb = bb;
346 		}
347 	      else
348 		{
349 		  int freq = EDGE_FREQUENCY (e);
350 		  if (!best_edge || freq > best_freq || e->count > best_count)
351 		    {
352 		      best_freq = freq;
353 		      best_count = e->count;
354 		      best_edge = e;
355 		      best_bb = bb;
356 		    }
357 		}
358 	    }
359 	}
360       bb = bb->aux;
361     }
362   while (bb != back_edge->dest);
363 
364   if (best_bb)
365     {
366       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
367 	 the trace.  */
368       if (back_edge->dest == trace->first)
369 	{
370 	  trace->first = best_bb->aux;
371 	}
372       else
373 	{
374 	  basic_block prev_bb;
375 
376 	  for (prev_bb = trace->first;
377 	       prev_bb->aux != back_edge->dest;
378 	       prev_bb = prev_bb->aux)
379 	    ;
380 	  prev_bb->aux = best_bb->aux;
381 
382 	  /* Try to get rid of uncond jump to cond jump.  */
383 	  if (single_succ_p (prev_bb))
384 	    {
385 	      basic_block header = single_succ (prev_bb);
386 
387 	      /* Duplicate HEADER if it is a small block containing cond jump
388 		 in the end.  */
389 	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
390 		  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
391 				     NULL_RTX))
392 		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
393 	    }
394 	}
395     }
396   else
397     {
398       /* We have not found suitable loop tail so do no rotation.  */
399       best_bb = back_edge->src;
400     }
401   best_bb->aux = NULL;
402   return best_bb;
403 }
404 
405 /* This function marks BB that it was visited in trace number TRACE.  */
406 
407 static void
mark_bb_visited(basic_block bb,int trace)408 mark_bb_visited (basic_block bb, int trace)
409 {
410   bb->il.rtl->visited = trace;
411   if (bbd[bb->index].heap)
412     {
413       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
414       bbd[bb->index].heap = NULL;
415       bbd[bb->index].node = NULL;
416     }
417 }
418 
419 /* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
420    not include basic blocks their probability is lower than BRANCH_TH or their
421    frequency is lower than EXEC_TH into traces (or count is lower than
422    COUNT_TH).  It stores the new traces into TRACES and modifies the number of
423    traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
424    expects that starting basic blocks are in *HEAP and at the end it deletes
425    *HEAP and stores starting points for the next round into new *HEAP.  */
426 
427 static void
find_traces_1_round(int branch_th,int exec_th,gcov_type count_th,struct trace * traces,int * n_traces,int round,fibheap_t * heap,int number_of_rounds)428 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
429 		     struct trace *traces, int *n_traces, int round,
430 		     fibheap_t *heap, int number_of_rounds)
431 {
432   /* Heap for discarded basic blocks which are possible starting points for
433      the next round.  */
434   fibheap_t new_heap = fibheap_new ();
435 
436   while (!fibheap_empty (*heap))
437     {
438       basic_block bb;
439       struct trace *trace;
440       edge best_edge, e;
441       fibheapkey_t key;
442       edge_iterator ei;
443 
444       bb = fibheap_extract_min (*heap);
445       bbd[bb->index].heap = NULL;
446       bbd[bb->index].node = NULL;
447 
448       if (dump_file)
449 	fprintf (dump_file, "Getting bb %d\n", bb->index);
450 
451       /* If the BB's frequency is too low send BB to the next round.  When
452          partitioning hot/cold blocks into separate sections, make sure all
453          the cold blocks (and ONLY the cold blocks) go into the (extra) final
454          round.  */
455 
456       if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
457 				count_th))
458 	{
459 	  int key = bb_to_key (bb);
460 	  bbd[bb->index].heap = new_heap;
461 	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
462 
463 	  if (dump_file)
464 	    fprintf (dump_file,
465 		     "  Possible start point of next round: %d (key: %d)\n",
466 		     bb->index, key);
467 	  continue;
468 	}
469 
470       trace = traces + *n_traces;
471       trace->first = bb;
472       trace->round = round;
473       trace->length = 0;
474       bbd[bb->index].in_trace = *n_traces;
475       (*n_traces)++;
476 
477       do
478 	{
479 	  int prob, freq;
480 	  bool ends_in_call;
481 
482 	  /* The probability and frequency of the best edge.  */
483 	  int best_prob = INT_MIN / 2;
484 	  int best_freq = INT_MIN / 2;
485 
486 	  best_edge = NULL;
487 	  mark_bb_visited (bb, *n_traces);
488 	  trace->length++;
489 
490 	  if (dump_file)
491 	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
492 		     bb->index, *n_traces - 1);
493 
494           ends_in_call = block_ends_with_call_p (bb);
495 
496 	  /* Select the successor that will be placed after BB.  */
497 	  FOR_EACH_EDGE (e, ei, bb->succs)
498 	    {
499 	      gcc_assert (!(e->flags & EDGE_FAKE));
500 
501 	      if (e->dest == EXIT_BLOCK_PTR)
502 		continue;
503 
504 	      if (e->dest->il.rtl->visited
505 		  && e->dest->il.rtl->visited != *n_traces)
506 		continue;
507 
508 	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
509 		continue;
510 
511 	      prob = e->probability;
512 	      freq = e->dest->frequency;
513 
514 	      /* The only sensible preference for a call instruction is the
515 		 fallthru edge.  Don't bother selecting anything else.  */
516 	      if (ends_in_call)
517 		{
518 		  if (e->flags & EDGE_CAN_FALLTHRU)
519 		    {
520 		      best_edge = e;
521 		      best_prob = prob;
522 		      best_freq = freq;
523 		    }
524 		  continue;
525 		}
526 
527 	      /* Edge that cannot be fallthru or improbable or infrequent
528 		 successor (i.e. it is unsuitable successor).  */
529 	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
530 		  || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
531 		  || e->count < count_th)
532 		continue;
533 
534 	      /* If partitioning hot/cold basic blocks, don't consider edges
535 		 that cross section boundaries.  */
536 
537 	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
538 				 best_edge))
539 		{
540 		  best_edge = e;
541 		  best_prob = prob;
542 		  best_freq = freq;
543 		}
544 	    }
545 
546 	  /* If the best destination has multiple predecessors, and can be
547 	     duplicated cheaper than a jump, don't allow it to be added
548 	     to a trace.  We'll duplicate it when connecting traces.  */
549 	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
550 	      && copy_bb_p (best_edge->dest, 0))
551 	    best_edge = NULL;
552 
553 	  /* Add all non-selected successors to the heaps.  */
554 	  FOR_EACH_EDGE (e, ei, bb->succs)
555 	    {
556 	      if (e == best_edge
557 		  || e->dest == EXIT_BLOCK_PTR
558 		  || e->dest->il.rtl->visited)
559 		continue;
560 
561 	      key = bb_to_key (e->dest);
562 
563 	      if (bbd[e->dest->index].heap)
564 		{
565 		  /* E->DEST is already in some heap.  */
566 		  if (key != bbd[e->dest->index].node->key)
567 		    {
568 		      if (dump_file)
569 			{
570 			  fprintf (dump_file,
571 				   "Changing key for bb %d from %ld to %ld.\n",
572 				   e->dest->index,
573 				   (long) bbd[e->dest->index].node->key,
574 				   key);
575 			}
576 		      fibheap_replace_key (bbd[e->dest->index].heap,
577 					   bbd[e->dest->index].node, key);
578 		    }
579 		}
580 	      else
581 		{
582 		  fibheap_t which_heap = *heap;
583 
584 		  prob = e->probability;
585 		  freq = EDGE_FREQUENCY (e);
586 
587 		  if (!(e->flags & EDGE_CAN_FALLTHRU)
588 		      || (e->flags & EDGE_COMPLEX)
589 		      || prob < branch_th || freq < exec_th
590 		      || e->count < count_th)
591 		    {
592 		      /* When partitioning hot/cold basic blocks, make sure
593 			 the cold blocks (and only the cold blocks) all get
594 			 pushed to the last round of trace collection.  */
595 
596 		      if (push_to_next_round_p (e->dest, round,
597 						number_of_rounds,
598 						exec_th, count_th))
599 			which_heap = new_heap;
600 		    }
601 
602 		  bbd[e->dest->index].heap = which_heap;
603 		  bbd[e->dest->index].node = fibheap_insert (which_heap,
604 								key, e->dest);
605 
606 		  if (dump_file)
607 		    {
608 		      fprintf (dump_file,
609 			       "  Possible start of %s round: %d (key: %ld)\n",
610 			       (which_heap == new_heap) ? "next" : "this",
611 			       e->dest->index, (long) key);
612 		    }
613 
614 		}
615 	    }
616 
617 	  if (best_edge) /* Suitable successor was found.  */
618 	    {
619 	      if (best_edge->dest->il.rtl->visited == *n_traces)
620 		{
621 		  /* We do nothing with one basic block loops.  */
622 		  if (best_edge->dest != bb)
623 		    {
624 		      if (EDGE_FREQUENCY (best_edge)
625 			  > 4 * best_edge->dest->frequency / 5)
626 			{
627 			  /* The loop has at least 4 iterations.  If the loop
628 			     header is not the first block of the function
629 			     we can rotate the loop.  */
630 
631 			  if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
632 			    {
633 			      if (dump_file)
634 				{
635 				  fprintf (dump_file,
636 					   "Rotating loop %d - %d\n",
637 					   best_edge->dest->index, bb->index);
638 				}
639 			      bb->aux = best_edge->dest;
640 			      bbd[best_edge->dest->index].in_trace =
641 				                             (*n_traces) - 1;
642 			      bb = rotate_loop (best_edge, trace, *n_traces);
643 			    }
644 			}
645 		      else
646 			{
647 			  /* The loop has less than 4 iterations.  */
648 
649 			  if (single_succ_p (bb)
650 			      && copy_bb_p (best_edge->dest, !optimize_size))
651 			    {
652 			      bb = copy_bb (best_edge->dest, best_edge, bb,
653 					    *n_traces);
654 			      trace->length++;
655 			    }
656 			}
657 		    }
658 
659 		  /* Terminate the trace.  */
660 		  break;
661 		}
662 	      else
663 		{
664 		  /* Check for a situation
665 
666 		    A
667 		   /|
668 		  B |
669 		   \|
670 		    C
671 
672 		  where
673 		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
674 		    >= EDGE_FREQUENCY (AC).
675 		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
676 		  Best ordering is then A B C.
677 
678 		  This situation is created for example by:
679 
680 		  if (A) B;
681 		  C;
682 
683 		  */
684 
685 		  FOR_EACH_EDGE (e, ei, bb->succs)
686 		    if (e != best_edge
687 			&& (e->flags & EDGE_CAN_FALLTHRU)
688 			&& !(e->flags & EDGE_COMPLEX)
689 			&& !e->dest->il.rtl->visited
690 			&& single_pred_p (e->dest)
691 			&& !(e->flags & EDGE_CROSSING)
692 			&& single_succ_p (e->dest)
693 			&& (single_succ_edge (e->dest)->flags
694 			    & EDGE_CAN_FALLTHRU)
695 			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
696 			&& single_succ (e->dest) == best_edge->dest
697 			&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
698 		      {
699 			best_edge = e;
700 			if (dump_file)
701 			  fprintf (dump_file, "Selecting BB %d\n",
702 				   best_edge->dest->index);
703 			break;
704 		      }
705 
706 		  bb->aux = best_edge->dest;
707 		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
708 		  bb = best_edge->dest;
709 		}
710 	    }
711 	}
712       while (best_edge);
713       trace->last = bb;
714       bbd[trace->first->index].start_of_trace = *n_traces - 1;
715       bbd[trace->last->index].end_of_trace = *n_traces - 1;
716 
717       /* The trace is terminated so we have to recount the keys in heap
718 	 (some block can have a lower key because now one of its predecessors
719 	 is an end of the trace).  */
720       FOR_EACH_EDGE (e, ei, bb->succs)
721 	{
722 	  if (e->dest == EXIT_BLOCK_PTR
723 	      || e->dest->il.rtl->visited)
724 	    continue;
725 
726 	  if (bbd[e->dest->index].heap)
727 	    {
728 	      key = bb_to_key (e->dest);
729 	      if (key != bbd[e->dest->index].node->key)
730 		{
731 		  if (dump_file)
732 		    {
733 		      fprintf (dump_file,
734 			       "Changing key for bb %d from %ld to %ld.\n",
735 			       e->dest->index,
736 			       (long) bbd[e->dest->index].node->key, key);
737 		    }
738 		  fibheap_replace_key (bbd[e->dest->index].heap,
739 				       bbd[e->dest->index].node,
740 				       key);
741 		}
742 	    }
743 	}
744     }
745 
746   fibheap_delete (*heap);
747 
748   /* "Return" the new heap.  */
749   *heap = new_heap;
750 }
751 
752 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
753    it to trace after BB, mark OLD_BB visited and update pass' data structures
754    (TRACE is a number of trace which OLD_BB is duplicated to).  */
755 
756 static basic_block
copy_bb(basic_block old_bb,edge e,basic_block bb,int trace)757 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
758 {
759   basic_block new_bb;
760 
761   new_bb = duplicate_block (old_bb, e, bb);
762   BB_COPY_PARTITION (new_bb, old_bb);
763 
764   gcc_assert (e->dest == new_bb);
765   gcc_assert (!e->dest->il.rtl->visited);
766 
767   if (dump_file)
768     fprintf (dump_file,
769 	     "Duplicated bb %d (created bb %d)\n",
770 	     old_bb->index, new_bb->index);
771   new_bb->il.rtl->visited = trace;
772   new_bb->aux = bb->aux;
773   bb->aux = new_bb;
774 
775   if (new_bb->index >= array_size || last_basic_block > array_size)
776     {
777       int i;
778       int new_size;
779 
780       new_size = MAX (last_basic_block, new_bb->index + 1);
781       new_size = GET_ARRAY_SIZE (new_size);
782       bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
783       for (i = array_size; i < new_size; i++)
784 	{
785 	  bbd[i].start_of_trace = -1;
786 	  bbd[i].in_trace = -1;
787 	  bbd[i].end_of_trace = -1;
788 	  bbd[i].heap = NULL;
789 	  bbd[i].node = NULL;
790 	}
791       array_size = new_size;
792 
793       if (dump_file)
794 	{
795 	  fprintf (dump_file,
796 		   "Growing the dynamic array to %d elements.\n",
797 		   array_size);
798 	}
799     }
800 
801   bbd[new_bb->index].in_trace = trace;
802 
803   return new_bb;
804 }
805 
806 /* Compute and return the key (for the heap) of the basic block BB.  */
807 
808 static fibheapkey_t
bb_to_key(basic_block bb)809 bb_to_key (basic_block bb)
810 {
811   edge e;
812   edge_iterator ei;
813   int priority = 0;
814 
815   /* Do not start in probably never executed blocks.  */
816 
817   if (BB_PARTITION (bb) == BB_COLD_PARTITION
818       || probably_never_executed_bb_p (bb))
819     return BB_FREQ_MAX;
820 
821   /* Prefer blocks whose predecessor is an end of some trace
822      or whose predecessor edge is EDGE_DFS_BACK.  */
823   FOR_EACH_EDGE (e, ei, bb->preds)
824     {
825       if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
826 	  || (e->flags & EDGE_DFS_BACK))
827 	{
828 	  int edge_freq = EDGE_FREQUENCY (e);
829 
830 	  if (edge_freq > priority)
831 	    priority = edge_freq;
832 	}
833     }
834 
835   if (priority)
836     /* The block with priority should have significantly lower key.  */
837     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
838   return -bb->frequency;
839 }
840 
841 /* Return true when the edge E from basic block BB is better than the temporary
842    best edge (details are in function).  The probability of edge E is PROB. The
843    frequency of the successor is FREQ.  The current best probability is
844    BEST_PROB, the best frequency is BEST_FREQ.
845    The edge is considered to be equivalent when PROB does not differ much from
846    BEST_PROB; similarly for frequency.  */
847 
848 static bool
better_edge_p(basic_block bb,edge e,int prob,int freq,int best_prob,int best_freq,edge cur_best_edge)849 better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
850 	       int best_freq, edge cur_best_edge)
851 {
852   bool is_better_edge;
853 
854   /* The BEST_* values do not have to be best, but can be a bit smaller than
855      maximum values.  */
856   int diff_prob = best_prob / 10;
857   int diff_freq = best_freq / 10;
858 
859   if (prob > best_prob + diff_prob)
860     /* The edge has higher probability than the temporary best edge.  */
861     is_better_edge = true;
862   else if (prob < best_prob - diff_prob)
863     /* The edge has lower probability than the temporary best edge.  */
864     is_better_edge = false;
865   else if (freq < best_freq - diff_freq)
866     /* The edge and the temporary best edge  have almost equivalent
867        probabilities.  The higher frequency of a successor now means
868        that there is another edge going into that successor.
869        This successor has lower frequency so it is better.  */
870     is_better_edge = true;
871   else if (freq > best_freq + diff_freq)
872     /* This successor has higher frequency so it is worse.  */
873     is_better_edge = false;
874   else if (e->dest->prev_bb == bb)
875     /* The edges have equivalent probabilities and the successors
876        have equivalent frequencies.  Select the previous successor.  */
877     is_better_edge = true;
878   else
879     is_better_edge = false;
880 
881   /* If we are doing hot/cold partitioning, make sure that we always favor
882      non-crossing edges over crossing edges.  */
883 
884   if (!is_better_edge
885       && flag_reorder_blocks_and_partition
886       && cur_best_edge
887       && (cur_best_edge->flags & EDGE_CROSSING)
888       && !(e->flags & EDGE_CROSSING))
889     is_better_edge = true;
890 
891   return is_better_edge;
892 }
893 
894 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
895 
896 static void
connect_traces(int n_traces,struct trace * traces)897 connect_traces (int n_traces, struct trace *traces)
898 {
899   int i;
900   bool *connected;
901   bool two_passes;
902   int last_trace;
903   int current_pass;
904   int current_partition;
905   int freq_threshold;
906   gcov_type count_threshold;
907 
908   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
909   if (max_entry_count < INT_MAX / 1000)
910     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
911   else
912     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
913 
914   connected = xcalloc (n_traces, sizeof (bool));
915   last_trace = -1;
916   current_pass = 1;
917   current_partition = BB_PARTITION (traces[0].first);
918   two_passes = false;
919 
920   if (flag_reorder_blocks_and_partition)
921     for (i = 0; i < n_traces && !two_passes; i++)
922       if (BB_PARTITION (traces[0].first)
923 	  != BB_PARTITION (traces[i].first))
924 	two_passes = true;
925 
926   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
927     {
928       int t = i;
929       int t2;
930       edge e, best;
931       int best_len;
932 
933       if (i >= n_traces)
934 	{
935 	  gcc_assert (two_passes && current_pass == 1);
936 	  i = 0;
937 	  t = i;
938 	  current_pass = 2;
939 	  if (current_partition == BB_HOT_PARTITION)
940 	    current_partition = BB_COLD_PARTITION;
941 	  else
942 	    current_partition = BB_HOT_PARTITION;
943 	}
944 
945       if (connected[t])
946 	continue;
947 
948       if (two_passes
949 	  && BB_PARTITION (traces[t].first) != current_partition)
950 	continue;
951 
952       connected[t] = true;
953 
954       /* Find the predecessor traces.  */
955       for (t2 = t; t2 > 0;)
956 	{
957 	  edge_iterator ei;
958 	  best = NULL;
959 	  best_len = 0;
960 	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
961 	    {
962 	      int si = e->src->index;
963 
964 	      if (e->src != ENTRY_BLOCK_PTR
965 		  && (e->flags & EDGE_CAN_FALLTHRU)
966 		  && !(e->flags & EDGE_COMPLEX)
967 		  && bbd[si].end_of_trace >= 0
968 		  && !connected[bbd[si].end_of_trace]
969 		  && (BB_PARTITION (e->src) == current_partition)
970 		  && (!best
971 		      || e->probability > best->probability
972 		      || (e->probability == best->probability
973 			  && traces[bbd[si].end_of_trace].length > best_len)))
974 		{
975 		  best = e;
976 		  best_len = traces[bbd[si].end_of_trace].length;
977 		}
978 	    }
979 	  if (best)
980 	    {
981 	      best->src->aux = best->dest;
982 	      t2 = bbd[best->src->index].end_of_trace;
983 	      connected[t2] = true;
984 
985 	      if (dump_file)
986 		{
987 		  fprintf (dump_file, "Connection: %d %d\n",
988 			   best->src->index, best->dest->index);
989 		}
990 	    }
991 	  else
992 	    break;
993 	}
994 
995       if (last_trace >= 0)
996 	traces[last_trace].last->aux = traces[t2].first;
997       last_trace = t;
998 
999       /* Find the successor traces.  */
1000       while (1)
1001 	{
1002 	  /* Find the continuation of the chain.  */
1003 	  edge_iterator ei;
1004 	  best = NULL;
1005 	  best_len = 0;
1006 	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1007 	    {
1008 	      int di = e->dest->index;
1009 
1010 	      if (e->dest != EXIT_BLOCK_PTR
1011 		  && (e->flags & EDGE_CAN_FALLTHRU)
1012 		  && !(e->flags & EDGE_COMPLEX)
1013 		  && bbd[di].start_of_trace >= 0
1014 		  && !connected[bbd[di].start_of_trace]
1015 		  && (BB_PARTITION (e->dest) == current_partition)
1016 		  && (!best
1017 		      || e->probability > best->probability
1018 		      || (e->probability == best->probability
1019 			  && traces[bbd[di].start_of_trace].length > best_len)))
1020 		{
1021 		  best = e;
1022 		  best_len = traces[bbd[di].start_of_trace].length;
1023 		}
1024 	    }
1025 
1026 	  if (best)
1027 	    {
1028 	      if (dump_file)
1029 		{
1030 		  fprintf (dump_file, "Connection: %d %d\n",
1031 			   best->src->index, best->dest->index);
1032 		}
1033 	      t = bbd[best->dest->index].start_of_trace;
1034 	      traces[last_trace].last->aux = traces[t].first;
1035 	      connected[t] = true;
1036 	      last_trace = t;
1037 	    }
1038 	  else
1039 	    {
1040 	      /* Try to connect the traces by duplication of 1 block.  */
1041 	      edge e2;
1042 	      basic_block next_bb = NULL;
1043 	      bool try_copy = false;
1044 
1045 	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1046 		if (e->dest != EXIT_BLOCK_PTR
1047 		    && (e->flags & EDGE_CAN_FALLTHRU)
1048 		    && !(e->flags & EDGE_COMPLEX)
1049 		    && (!best || e->probability > best->probability))
1050 		  {
1051 		    edge_iterator ei;
1052 		    edge best2 = NULL;
1053 		    int best2_len = 0;
1054 
1055 		    /* If the destination is a start of a trace which is only
1056 		       one block long, then no need to search the successor
1057 		       blocks of the trace.  Accept it.  */
1058 		    if (bbd[e->dest->index].start_of_trace >= 0
1059 			&& traces[bbd[e->dest->index].start_of_trace].length
1060 			   == 1)
1061 		      {
1062 			best = e;
1063 			try_copy = true;
1064 			continue;
1065 		      }
1066 
1067 		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1068 		      {
1069 			int di = e2->dest->index;
1070 
1071 			if (e2->dest == EXIT_BLOCK_PTR
1072 			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1073 				&& !(e2->flags & EDGE_COMPLEX)
1074 				&& bbd[di].start_of_trace >= 0
1075 				&& !connected[bbd[di].start_of_trace]
1076 				&& (BB_PARTITION (e2->dest) == current_partition)
1077 				&& (EDGE_FREQUENCY (e2) >= freq_threshold)
1078 				&& (e2->count >= count_threshold)
1079 				&& (!best2
1080 				    || e2->probability > best2->probability
1081 				    || (e2->probability == best2->probability
1082 					&& traces[bbd[di].start_of_trace].length
1083 					   > best2_len))))
1084 			  {
1085 			    best = e;
1086 			    best2 = e2;
1087 			    if (e2->dest != EXIT_BLOCK_PTR)
1088 			      best2_len = traces[bbd[di].start_of_trace].length;
1089 			    else
1090 			      best2_len = INT_MAX;
1091 			    next_bb = e2->dest;
1092 			    try_copy = true;
1093 			  }
1094 		      }
1095 		  }
1096 
1097 	      if (flag_reorder_blocks_and_partition)
1098 		try_copy = false;
1099 
1100 	      /* Copy tiny blocks always; copy larger blocks only when the
1101 		 edge is traversed frequently enough.  */
1102 	      if (try_copy
1103 		  && copy_bb_p (best->dest,
1104 				!optimize_size
1105 				&& EDGE_FREQUENCY (best) >= freq_threshold
1106 				&& best->count >= count_threshold))
1107 		{
1108 		  basic_block new_bb;
1109 
1110 		  if (dump_file)
1111 		    {
1112 		      fprintf (dump_file, "Connection: %d %d ",
1113 			       traces[t].last->index, best->dest->index);
1114 		      if (!next_bb)
1115 			fputc ('\n', dump_file);
1116 		      else if (next_bb == EXIT_BLOCK_PTR)
1117 			fprintf (dump_file, "exit\n");
1118 		      else
1119 			fprintf (dump_file, "%d\n", next_bb->index);
1120 		    }
1121 
1122 		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1123 		  traces[t].last = new_bb;
1124 		  if (next_bb && next_bb != EXIT_BLOCK_PTR)
1125 		    {
1126 		      t = bbd[next_bb->index].start_of_trace;
1127 		      traces[last_trace].last->aux = traces[t].first;
1128 		      connected[t] = true;
1129 		      last_trace = t;
1130 		    }
1131 		  else
1132 		    break;	/* Stop finding the successor traces.  */
1133 		}
1134 	      else
1135 		break;	/* Stop finding the successor traces.  */
1136 	    }
1137 	}
1138     }
1139 
1140   if (dump_file)
1141     {
1142       basic_block bb;
1143 
1144       fprintf (dump_file, "Final order:\n");
1145       for (bb = traces[0].first; bb; bb = bb->aux)
1146 	fprintf (dump_file, "%d ", bb->index);
1147       fprintf (dump_file, "\n");
1148       fflush (dump_file);
1149     }
1150 
1151   FREE (connected);
1152 }
1153 
1154 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1155    when code size is allowed to grow by duplication.  */
1156 
1157 static bool
copy_bb_p(basic_block bb,int code_may_grow)1158 copy_bb_p (basic_block bb, int code_may_grow)
1159 {
1160   int size = 0;
1161   int max_size = uncond_jump_length;
1162   rtx insn;
1163 
1164   if (!bb->frequency)
1165     return false;
1166   if (EDGE_COUNT (bb->preds) < 2)
1167     return false;
1168   if (!can_duplicate_block_p (bb))
1169     return false;
1170 
1171   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1172   if (EDGE_COUNT (bb->succs) > 8)
1173     return false;
1174 
1175   if (code_may_grow && maybe_hot_bb_p (bb))
1176     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1177 
1178   FOR_BB_INSNS (bb, insn)
1179     {
1180       if (INSN_P (insn))
1181 	size += get_attr_min_length (insn);
1182     }
1183 
1184   if (size <= max_size)
1185     return true;
1186 
1187   if (dump_file)
1188     {
1189       fprintf (dump_file,
1190 	       "Block %d can't be copied because its size = %d.\n",
1191 	       bb->index, size);
1192     }
1193 
1194   return false;
1195 }
1196 
1197 /* Return the length of unconditional jump instruction.  */
1198 
1199 static int
get_uncond_jump_length(void)1200 get_uncond_jump_length (void)
1201 {
1202   rtx label, jump;
1203   int length;
1204 
1205   label = emit_label_before (gen_label_rtx (), get_insns ());
1206   jump = emit_jump_insn (gen_jump (label));
1207 
1208   length = get_attr_min_length (jump);
1209 
1210   delete_insn (jump);
1211   delete_insn (label);
1212   return length;
1213 }
1214 
1215 /* Find the basic blocks that are rarely executed and need to be moved to
1216    a separate section of the .o file (to cut down on paging and improve
1217    cache locality).  */
1218 
1219 static void
find_rarely_executed_basic_blocks_and_crossing_edges(edge * crossing_edges,int * n_crossing_edges,int * max_idx)1220 find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1221 						      int *n_crossing_edges,
1222 						      int *max_idx)
1223 {
1224   basic_block bb;
1225   bool has_hot_blocks = false;
1226   edge e;
1227   int i;
1228   edge_iterator ei;
1229 
1230   /* Mark which partition (hot/cold) each basic block belongs in.  */
1231 
1232   FOR_EACH_BB (bb)
1233     {
1234       if (probably_never_executed_bb_p (bb))
1235 	BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1236       else
1237 	{
1238 	  BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1239 	  has_hot_blocks = true;
1240 	}
1241     }
1242 
1243   /* Mark every edge that crosses between sections.  */
1244 
1245   i = 0;
1246   FOR_EACH_BB (bb)
1247     FOR_EACH_EDGE (e, ei, bb->succs)
1248     {
1249       if (e->src != ENTRY_BLOCK_PTR
1250 	  && e->dest != EXIT_BLOCK_PTR
1251 	  && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1252 	{
1253 	  e->flags |= EDGE_CROSSING;
1254 	  if (i == *max_idx)
1255 	    {
1256 	      *max_idx *= 2;
1257 	      crossing_edges = xrealloc (crossing_edges,
1258 					 (*max_idx) * sizeof (edge));
1259 	    }
1260 	  crossing_edges[i++] = e;
1261 	}
1262       else
1263 	e->flags &= ~EDGE_CROSSING;
1264     }
1265   *n_crossing_edges = i;
1266 }
1267 
1268 /* If any destination of a crossing edge does not have a label, add label;
1269    Convert any fall-through crossing edges (for blocks that do not contain
1270    a jump) to unconditional jumps.  */
1271 
1272 static void
add_labels_and_missing_jumps(edge * crossing_edges,int n_crossing_edges)1273 add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1274 {
1275   int i;
1276   basic_block src;
1277   basic_block dest;
1278   rtx label;
1279   rtx barrier;
1280   rtx new_jump;
1281 
1282   for (i=0; i < n_crossing_edges; i++)
1283     {
1284       if (crossing_edges[i])
1285   	{
1286   	  src = crossing_edges[i]->src;
1287   	  dest = crossing_edges[i]->dest;
1288 
1289   	  /* Make sure dest has a label.  */
1290 
1291   	  if (dest && (dest != EXIT_BLOCK_PTR))
1292   	    {
1293 	      label = block_label (dest);
1294 
1295  	      /* Make sure source block ends with a jump.  */
1296 
1297  	      if (src && (src != ENTRY_BLOCK_PTR))
1298  		{
1299 		  if (!JUMP_P (BB_END (src)))
1300  		    /* bb just falls through.  */
1301  		    {
1302  		      /* make sure there's only one successor */
1303 		      gcc_assert (single_succ_p (src));
1304 
1305 		      /* Find label in dest block.  */
1306 		      label = block_label (dest);
1307 
1308 		      new_jump = emit_jump_insn_after (gen_jump (label),
1309 						       BB_END (src));
1310 		      barrier = emit_barrier_after (new_jump);
1311 		      JUMP_LABEL (new_jump) = label;
1312 		      LABEL_NUSES (label) += 1;
1313 		      src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1314 		      /* Mark edge as non-fallthru.  */
1315 		      crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1316  		    } /* end: 'if (GET_CODE ... '  */
1317  		} /* end: 'if (src && src->index...'  */
1318   	    } /* end: 'if (dest && dest->index...'  */
1319   	} /* end: 'if (crossing_edges[i]...'  */
1320     } /* end for loop  */
1321 }
1322 
1323 /* Find any bb's where the fall-through edge is a crossing edge (note that
1324    these bb's must also contain a conditional jump; we've already
1325    dealt with fall-through edges for blocks that didn't have a
1326    conditional jump in the call to add_labels_and_missing_jumps).
1327    Convert the fall-through edge to non-crossing edge by inserting a
1328    new bb to fall-through into.  The new bb will contain an
1329    unconditional jump (crossing edge) to the original fall through
1330    destination.  */
1331 
1332 static void
fix_up_fall_thru_edges(void)1333 fix_up_fall_thru_edges (void)
1334 {
1335   basic_block cur_bb;
1336   basic_block new_bb;
1337   edge succ1;
1338   edge succ2;
1339   edge fall_thru;
1340   edge cond_jump = NULL;
1341   edge e;
1342   bool cond_jump_crosses;
1343   int invert_worked;
1344   rtx old_jump;
1345   rtx fall_thru_label;
1346   rtx barrier;
1347 
1348   FOR_EACH_BB (cur_bb)
1349     {
1350       fall_thru = NULL;
1351       if (EDGE_COUNT (cur_bb->succs) > 0)
1352 	succ1 = EDGE_SUCC (cur_bb, 0);
1353       else
1354 	succ1 = NULL;
1355 
1356       if (EDGE_COUNT (cur_bb->succs) > 1)
1357   	succ2 = EDGE_SUCC (cur_bb, 1);
1358       else
1359   	succ2 = NULL;
1360 
1361       /* Find the fall-through edge.  */
1362 
1363       if (succ1
1364  	  && (succ1->flags & EDGE_FALLTHRU))
1365  	{
1366  	  fall_thru = succ1;
1367  	  cond_jump = succ2;
1368  	}
1369       else if (succ2
1370  	       && (succ2->flags & EDGE_FALLTHRU))
1371  	{
1372  	  fall_thru = succ2;
1373  	  cond_jump = succ1;
1374  	}
1375 
1376       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1377   	{
1378   	  /* Check to see if the fall-thru edge is a crossing edge.  */
1379 
1380 	  if (fall_thru->flags & EDGE_CROSSING)
1381   	    {
1382 	      /* The fall_thru edge crosses; now check the cond jump edge, if
1383 	         it exists.  */
1384 
1385  	      cond_jump_crosses = true;
1386  	      invert_worked  = 0;
1387 	      old_jump = BB_END (cur_bb);
1388 
1389  	      /* Find the jump instruction, if there is one.  */
1390 
1391  	      if (cond_jump)
1392  		{
1393 		  if (!(cond_jump->flags & EDGE_CROSSING))
1394  		    cond_jump_crosses = false;
1395 
1396  		  /* We know the fall-thru edge crosses; if the cond
1397  		     jump edge does NOT cross, and its destination is the
1398 		     next block in the bb order, invert the jump
1399  		     (i.e. fix it so the fall thru does not cross and
1400  		     the cond jump does).  */
1401 
1402 		  if (!cond_jump_crosses
1403 		      && cur_bb->aux == cond_jump->dest)
1404  		    {
1405  		      /* Find label in fall_thru block. We've already added
1406  		         any missing labels, so there must be one.  */
1407 
1408  		      fall_thru_label = block_label (fall_thru->dest);
1409 
1410  		      if (old_jump && fall_thru_label)
1411  			invert_worked = invert_jump (old_jump,
1412  						     fall_thru_label,0);
1413  		      if (invert_worked)
1414  			{
1415  			  fall_thru->flags &= ~EDGE_FALLTHRU;
1416  			  cond_jump->flags |= EDGE_FALLTHRU;
1417  			  update_br_prob_note (cur_bb);
1418  			  e = fall_thru;
1419  			  fall_thru = cond_jump;
1420  			  cond_jump = e;
1421 			  cond_jump->flags |= EDGE_CROSSING;
1422 			  fall_thru->flags &= ~EDGE_CROSSING;
1423  			}
1424  		    }
1425  		}
1426 
1427  	      if (cond_jump_crosses || !invert_worked)
1428  		{
1429  		  /* This is the case where both edges out of the basic
1430  		     block are crossing edges. Here we will fix up the
1431 		     fall through edge. The jump edge will be taken care
1432 		     of later.  */
1433 
1434  		  new_bb = force_nonfallthru (fall_thru);
1435 
1436  		  if (new_bb)
1437  		    {
1438  		      new_bb->aux = cur_bb->aux;
1439  		      cur_bb->aux = new_bb;
1440 
1441  		      /* Make sure new fall-through bb is in same
1442 			 partition as bb it's falling through from.  */
1443 
1444 		      BB_COPY_PARTITION (new_bb, cur_bb);
1445 		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1446  		    }
1447 
1448  		  /* Add barrier after new jump */
1449 
1450  		  if (new_bb)
1451  		    {
1452  		      barrier = emit_barrier_after (BB_END (new_bb));
1453  		      new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1454  							       barrier);
1455  		    }
1456  		  else
1457  		    {
1458  		      barrier = emit_barrier_after (BB_END (cur_bb));
1459  		      cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1460  							       barrier);
1461  		    }
1462  		}
1463   	    }
1464   	}
1465     }
1466 }
1467 
1468 /* This function checks the destination blockof a "crossing jump" to
1469    see if it has any crossing predecessors that begin with a code label
1470    and end with an unconditional jump.  If so, it returns that predecessor
1471    block.  (This is to avoid creating lots of new basic blocks that all
1472    contain unconditional jumps to the same destination).  */
1473 
1474 static basic_block
find_jump_block(basic_block jump_dest)1475 find_jump_block (basic_block jump_dest)
1476 {
1477   basic_block source_bb = NULL;
1478   edge e;
1479   rtx insn;
1480   edge_iterator ei;
1481 
1482   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1483     if (e->flags & EDGE_CROSSING)
1484       {
1485 	basic_block src = e->src;
1486 
1487 	/* Check each predecessor to see if it has a label, and contains
1488 	   only one executable instruction, which is an unconditional jump.
1489 	   If so, we can use it.  */
1490 
1491 	if (LABEL_P (BB_HEAD (src)))
1492 	  for (insn = BB_HEAD (src);
1493 	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1494 	       insn = NEXT_INSN (insn))
1495 	    {
1496 	      if (INSN_P (insn)
1497 		  && insn == BB_END (src)
1498 		  && JUMP_P (insn)
1499 		  && !any_condjump_p (insn))
1500 		{
1501 		  source_bb = src;
1502 		  break;
1503 		}
1504 	    }
1505 
1506 	if (source_bb)
1507 	  break;
1508       }
1509 
1510   return source_bb;
1511 }
1512 
1513 /* Find all BB's with conditional jumps that are crossing edges;
1514    insert a new bb and make the conditional jump branch to the new
1515    bb instead (make the new bb same color so conditional branch won't
1516    be a 'crossing' edge).  Insert an unconditional jump from the
1517    new bb to the original destination of the conditional jump.  */
1518 
1519 static void
fix_crossing_conditional_branches(void)1520 fix_crossing_conditional_branches (void)
1521 {
1522   basic_block cur_bb;
1523   basic_block new_bb;
1524   basic_block last_bb;
1525   basic_block dest;
1526   basic_block prev_bb;
1527   edge succ1;
1528   edge succ2;
1529   edge crossing_edge;
1530   edge new_edge;
1531   rtx old_jump;
1532   rtx set_src;
1533   rtx old_label = NULL_RTX;
1534   rtx new_label;
1535   rtx new_jump;
1536   rtx barrier;
1537 
1538  last_bb = EXIT_BLOCK_PTR->prev_bb;
1539 
1540   FOR_EACH_BB (cur_bb)
1541     {
1542       crossing_edge = NULL;
1543       if (EDGE_COUNT (cur_bb->succs) > 0)
1544 	succ1 = EDGE_SUCC (cur_bb, 0);
1545       else
1546 	succ1 = NULL;
1547 
1548       if (EDGE_COUNT (cur_bb->succs) > 1)
1549 	succ2 = EDGE_SUCC (cur_bb, 1);
1550       else
1551 	succ2 = NULL;
1552 
1553       /* We already took care of fall-through edges, so only one successor
1554 	 can be a crossing edge.  */
1555 
1556       if (succ1 && (succ1->flags & EDGE_CROSSING))
1557 	crossing_edge = succ1;
1558       else if (succ2 && (succ2->flags & EDGE_CROSSING))
1559  	crossing_edge = succ2;
1560 
1561       if (crossing_edge)
1562  	{
1563 	  old_jump = BB_END (cur_bb);
1564 
1565 	  /* Check to make sure the jump instruction is a
1566 	     conditional jump.  */
1567 
1568 	  set_src = NULL_RTX;
1569 
1570 	  if (any_condjump_p (old_jump))
1571 	    {
1572 	      if (GET_CODE (PATTERN (old_jump)) == SET)
1573 		set_src = SET_SRC (PATTERN (old_jump));
1574 	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1575 		{
1576 		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
1577 		  if (GET_CODE (set_src) == SET)
1578 		    set_src = SET_SRC (set_src);
1579 		  else
1580 		    set_src = NULL_RTX;
1581 		}
1582 	    }
1583 
1584 	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1585 	    {
1586 	      if (GET_CODE (XEXP (set_src, 1)) == PC)
1587 		old_label = XEXP (set_src, 2);
1588 	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
1589 		old_label = XEXP (set_src, 1);
1590 
1591 	      /* Check to see if new bb for jumping to that dest has
1592 		 already been created; if so, use it; if not, create
1593 		 a new one.  */
1594 
1595 	      new_bb = find_jump_block (crossing_edge->dest);
1596 
1597 	      if (new_bb)
1598 		new_label = block_label (new_bb);
1599 	      else
1600 		{
1601 		  /* Create new basic block to be dest for
1602 		     conditional jump.  */
1603 
1604 		  new_bb = create_basic_block (NULL, NULL, last_bb);
1605 		  new_bb->aux = last_bb->aux;
1606 		  last_bb->aux = new_bb;
1607 		  prev_bb = last_bb;
1608 		  last_bb = new_bb;
1609 
1610 		  /* Update register liveness information.  */
1611 
1612 		  new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (&reg_obstack);
1613 		  new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (&reg_obstack);
1614 		  COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1615 				prev_bb->il.rtl->global_live_at_end);
1616 		  COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1617 				prev_bb->il.rtl->global_live_at_end);
1618 
1619 		  /* Put appropriate instructions in new bb.  */
1620 
1621 		  new_label = gen_label_rtx ();
1622 		  emit_label_before (new_label, BB_HEAD (new_bb));
1623 		  BB_HEAD (new_bb) = new_label;
1624 
1625 		  if (GET_CODE (old_label) == LABEL_REF)
1626 		    {
1627 		      old_label = JUMP_LABEL (old_jump);
1628 		      new_jump = emit_jump_insn_after (gen_jump
1629 						       (old_label),
1630 						       BB_END (new_bb));
1631 		    }
1632 		  else
1633 		    {
1634 		      gcc_assert (HAVE_return
1635 				  && GET_CODE (old_label) == RETURN);
1636 		      new_jump = emit_jump_insn_after (gen_return (),
1637 						       BB_END (new_bb));
1638 		    }
1639 
1640 		  barrier = emit_barrier_after (new_jump);
1641 		  JUMP_LABEL (new_jump) = old_label;
1642 		  new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1643 							   barrier);
1644 
1645 		  /* Make sure new bb is in same partition as source
1646 		     of conditional branch.  */
1647 		  BB_COPY_PARTITION (new_bb, cur_bb);
1648 		}
1649 
1650 	      /* Make old jump branch to new bb.  */
1651 
1652 	      redirect_jump (old_jump, new_label, 0);
1653 
1654 	      /* Remove crossing_edge as predecessor of 'dest'.  */
1655 
1656 	      dest = crossing_edge->dest;
1657 
1658 	      redirect_edge_succ (crossing_edge, new_bb);
1659 
1660 	      /* Make a new edge from new_bb to old dest; new edge
1661 		 will be a successor for new_bb and a predecessor
1662 		 for 'dest'.  */
1663 
1664 	      if (EDGE_COUNT (new_bb->succs) == 0)
1665 		new_edge = make_edge (new_bb, dest, 0);
1666 	      else
1667 		new_edge = EDGE_SUCC (new_bb, 0);
1668 
1669 	      crossing_edge->flags &= ~EDGE_CROSSING;
1670 	      new_edge->flags |= EDGE_CROSSING;
1671 	    }
1672  	}
1673     }
1674 }
1675 
1676 /* Find any unconditional branches that cross between hot and cold
1677    sections.  Convert them into indirect jumps instead.  */
1678 
1679 static void
fix_crossing_unconditional_branches(void)1680 fix_crossing_unconditional_branches (void)
1681 {
1682   basic_block cur_bb;
1683   rtx last_insn;
1684   rtx label;
1685   rtx label_addr;
1686   rtx indirect_jump_sequence;
1687   rtx jump_insn = NULL_RTX;
1688   rtx new_reg;
1689   rtx cur_insn;
1690   edge succ;
1691 
1692   FOR_EACH_BB (cur_bb)
1693     {
1694       last_insn = BB_END (cur_bb);
1695 
1696       if (EDGE_COUNT (cur_bb->succs) < 1)
1697 	continue;
1698 
1699       succ = EDGE_SUCC (cur_bb, 0);
1700 
1701       /* Check to see if bb ends in a crossing (unconditional) jump.  At
1702          this point, no crossing jumps should be conditional.  */
1703 
1704       if (JUMP_P (last_insn)
1705 	  && (succ->flags & EDGE_CROSSING))
1706 	{
1707 	  rtx label2, table;
1708 
1709 	  gcc_assert (!any_condjump_p (last_insn));
1710 
1711 	  /* Make sure the jump is not already an indirect or table jump.  */
1712 
1713 	  if (!computed_jump_p (last_insn)
1714 	      && !tablejump_p (last_insn, &label2, &table))
1715 	    {
1716 	      /* We have found a "crossing" unconditional branch.  Now
1717 		 we must convert it to an indirect jump.  First create
1718 		 reference of label, as target for jump.  */
1719 
1720 	      label = JUMP_LABEL (last_insn);
1721 	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
1722 	      LABEL_NUSES (label) += 1;
1723 
1724 	      /* Get a register to use for the indirect jump.  */
1725 
1726 	      new_reg = gen_reg_rtx (Pmode);
1727 
1728 	      /* Generate indirect the jump sequence.  */
1729 
1730 	      start_sequence ();
1731 	      emit_move_insn (new_reg, label_addr);
1732 	      emit_indirect_jump (new_reg);
1733 	      indirect_jump_sequence = get_insns ();
1734 	      end_sequence ();
1735 
1736 	      /* Make sure every instruction in the new jump sequence has
1737 		 its basic block set to be cur_bb.  */
1738 
1739 	      for (cur_insn = indirect_jump_sequence; cur_insn;
1740 		   cur_insn = NEXT_INSN (cur_insn))
1741 		{
1742 		  if (!BARRIER_P (cur_insn))
1743 		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
1744 		  if (JUMP_P (cur_insn))
1745 		    jump_insn = cur_insn;
1746 		}
1747 
1748 	      /* Insert the new (indirect) jump sequence immediately before
1749 		 the unconditional jump, then delete the unconditional jump.  */
1750 
1751 	      emit_insn_before (indirect_jump_sequence, last_insn);
1752 	      delete_insn (last_insn);
1753 
1754 	      /* Make BB_END for cur_bb be the jump instruction (NOT the
1755 		 barrier instruction at the end of the sequence...).  */
1756 
1757 	      BB_END (cur_bb) = jump_insn;
1758 	    }
1759 	}
1760     }
1761 }
1762 
1763 /* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1764 
1765 static void
add_reg_crossing_jump_notes(void)1766 add_reg_crossing_jump_notes (void)
1767 {
1768   basic_block bb;
1769   edge e;
1770   edge_iterator ei;
1771 
1772   FOR_EACH_BB (bb)
1773     FOR_EACH_EDGE (e, ei, bb->succs)
1774       if ((e->flags & EDGE_CROSSING)
1775 	  && JUMP_P (BB_END (e->src)))
1776 	REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1777 							 NULL_RTX,
1778 						         REG_NOTES (BB_END
1779 								  (e->src)));
1780 }
1781 
1782 /* Hot and cold basic blocks are partitioned and put in separate
1783    sections of the .o file, to reduce paging and improve cache
1784    performance (hopefully).  This can result in bits of code from the
1785    same function being widely separated in the .o file.  However this
1786    is not obvious to the current bb structure.  Therefore we must take
1787    care to ensure that: 1). There are no fall_thru edges that cross
1788    between sections; 2). For those architectures which have "short"
1789    conditional branches, all conditional branches that attempt to
1790    cross between sections are converted to unconditional branches;
1791    and, 3). For those architectures which have "short" unconditional
1792    branches, all unconditional branches that attempt to cross between
1793    sections are converted to indirect jumps.
1794 
1795    The code for fixing up fall_thru edges that cross between hot and
1796    cold basic blocks does so by creating new basic blocks containing
1797    unconditional branches to the appropriate label in the "other"
1798    section.  The new basic block is then put in the same (hot or cold)
1799    section as the original conditional branch, and the fall_thru edge
1800    is modified to fall into the new basic block instead.  By adding
1801    this level of indirection we end up with only unconditional branches
1802    crossing between hot and cold sections.
1803 
1804    Conditional branches are dealt with by adding a level of indirection.
1805    A new basic block is added in the same (hot/cold) section as the
1806    conditional branch, and the conditional branch is retargeted to the
1807    new basic block.  The new basic block contains an unconditional branch
1808    to the original target of the conditional branch (in the other section).
1809 
1810    Unconditional branches are dealt with by converting them into
1811    indirect jumps.  */
1812 
1813 static void
fix_edges_for_rarely_executed_code(edge * crossing_edges,int n_crossing_edges)1814 fix_edges_for_rarely_executed_code (edge *crossing_edges,
1815 				    int n_crossing_edges)
1816 {
1817   /* Make sure the source of any crossing edge ends in a jump and the
1818      destination of any crossing edge has a label.  */
1819 
1820   add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1821 
1822   /* Convert all crossing fall_thru edges to non-crossing fall
1823      thrus to unconditional jumps (that jump to the original fall
1824      thru dest).  */
1825 
1826   fix_up_fall_thru_edges ();
1827 
1828   /* If the architecture does not have conditional branches that can
1829      span all of memory, convert crossing conditional branches into
1830      crossing unconditional branches.  */
1831 
1832   if (!HAS_LONG_COND_BRANCH)
1833     fix_crossing_conditional_branches ();
1834 
1835   /* If the architecture does not have unconditional branches that
1836      can span all of memory, convert crossing unconditional branches
1837      into indirect jumps.  Since adding an indirect jump also adds
1838      a new register usage, update the register usage information as
1839      well.  */
1840 
1841   if (!HAS_LONG_UNCOND_BRANCH)
1842     {
1843       fix_crossing_unconditional_branches ();
1844       reg_scan (get_insns(), max_reg_num ());
1845     }
1846 
1847   add_reg_crossing_jump_notes ();
1848 }
1849 
1850 /* Verify, in the basic block chain, that there is at most one switch
1851    between hot/cold partitions. This is modelled on
1852    rtl_verify_flow_info_1, but it cannot go inside that function
1853    because this condition will not be true until after
1854    reorder_basic_blocks is called.  */
1855 
1856 static void
verify_hot_cold_block_grouping(void)1857 verify_hot_cold_block_grouping (void)
1858 {
1859   basic_block bb;
1860   int err = 0;
1861   bool switched_sections = false;
1862   int current_partition = 0;
1863 
1864   FOR_EACH_BB (bb)
1865     {
1866       if (!current_partition)
1867 	current_partition = BB_PARTITION (bb);
1868       if (BB_PARTITION (bb) != current_partition)
1869 	{
1870 	  if (switched_sections)
1871 	    {
1872 	      error ("multiple hot/cold transitions found (bb %i)",
1873 		     bb->index);
1874 	      err = 1;
1875 	    }
1876 	  else
1877 	    {
1878 	      switched_sections = true;
1879 	      current_partition = BB_PARTITION (bb);
1880 	    }
1881 	}
1882     }
1883 
1884   gcc_assert(!err);
1885 }
1886 
1887 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1888    the set of flags to pass to cfg_layout_initialize().  */
1889 
1890 void
reorder_basic_blocks(unsigned int flags)1891 reorder_basic_blocks (unsigned int flags)
1892 {
1893   int n_traces;
1894   int i;
1895   struct trace *traces;
1896 
1897   if (n_basic_blocks <= 1)
1898     return;
1899 
1900   if (targetm.cannot_modify_jumps_p ())
1901     return;
1902 
1903   cfg_layout_initialize (flags);
1904 
1905   set_edge_can_fallthru_flag ();
1906   mark_dfs_back_edges ();
1907 
1908   /* We are estimating the length of uncond jump insn only once since the code
1909      for getting the insn length always returns the minimal length now.  */
1910   if (uncond_jump_length == 0)
1911     uncond_jump_length = get_uncond_jump_length ();
1912 
1913   /* We need to know some information for each basic block.  */
1914   array_size = GET_ARRAY_SIZE (last_basic_block);
1915   bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1916   for (i = 0; i < array_size; i++)
1917     {
1918       bbd[i].start_of_trace = -1;
1919       bbd[i].in_trace = -1;
1920       bbd[i].end_of_trace = -1;
1921       bbd[i].heap = NULL;
1922       bbd[i].node = NULL;
1923     }
1924 
1925   traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1926   n_traces = 0;
1927   find_traces (&n_traces, traces);
1928   connect_traces (n_traces, traces);
1929   FREE (traces);
1930   FREE (bbd);
1931 
1932   if (dump_file)
1933     dump_flow_info (dump_file);
1934 
1935   cfg_layout_finalize ();
1936   if (flag_reorder_blocks_and_partition)
1937     verify_hot_cold_block_grouping ();
1938 }
1939 
1940 /* Determine which partition the first basic block in the function
1941    belongs to, then find the first basic block in the current function
1942    that belongs to a different section, and insert a
1943    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1944    instruction stream.  When writing out the assembly code,
1945    encountering this note will make the compiler switch between the
1946    hot and cold text sections.  */
1947 
1948 static void
insert_section_boundary_note(void)1949 insert_section_boundary_note (void)
1950 {
1951   basic_block bb;
1952   rtx new_note;
1953   int first_partition = 0;
1954 
1955   if (flag_reorder_blocks_and_partition)
1956     FOR_EACH_BB (bb)
1957     {
1958       if (!first_partition)
1959 	first_partition = BB_PARTITION (bb);
1960       if (BB_PARTITION (bb) != first_partition)
1961 	{
1962 	  new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1963 				       BB_HEAD (bb));
1964 	  break;
1965 	}
1966     }
1967 }
1968 
1969 /* Duplicate the blocks containing computed gotos.  This basically unfactors
1970    computed gotos that were factored early on in the compilation process to
1971    speed up edge based data flow.  We used to not unfactoring them again,
1972    which can seriously pessimize code with many computed jumps in the source
1973    code, such as interpreters.  See e.g. PR15242.  */
1974 
1975 static bool
gate_duplicate_computed_gotos(void)1976 gate_duplicate_computed_gotos (void)
1977 {
1978   return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
1979 }
1980 
1981 
1982 static void
duplicate_computed_gotos(void)1983 duplicate_computed_gotos (void)
1984 {
1985   basic_block bb, new_bb;
1986   bitmap candidates;
1987   int max_size;
1988 
1989   if (n_basic_blocks <= 1)
1990     return;
1991 
1992   if (targetm.cannot_modify_jumps_p ())
1993     return;
1994 
1995   cfg_layout_initialize (0);
1996 
1997   /* We are estimating the length of uncond jump insn only once
1998      since the code for getting the insn length always returns
1999      the minimal length now.  */
2000   if (uncond_jump_length == 0)
2001     uncond_jump_length = get_uncond_jump_length ();
2002 
2003   max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2004   candidates = BITMAP_ALLOC (NULL);
2005 
2006   /* Look for blocks that end in a computed jump, and see if such blocks
2007      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2008      mark it in the candidates.  */
2009   FOR_EACH_BB (bb)
2010     {
2011       rtx insn;
2012       edge e;
2013       edge_iterator ei;
2014       int size, all_flags;
2015 
2016       /* Build the reorder chain for the original order of blocks.  */
2017       if (bb->next_bb != EXIT_BLOCK_PTR)
2018 	bb->aux = bb->next_bb;
2019 
2020       /* Obviously the block has to end in a computed jump.  */
2021       if (!computed_jump_p (BB_END (bb)))
2022 	continue;
2023 
2024       /* Only consider blocks that can be duplicated.  */
2025       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2026 	  || !can_duplicate_block_p (bb))
2027 	continue;
2028 
2029       /* Make sure that the block is small enough.  */
2030       size = 0;
2031       FOR_BB_INSNS (bb, insn)
2032 	if (INSN_P (insn))
2033 	  {
2034 	    size += get_attr_min_length (insn);
2035 	    if (size > max_size)
2036 	       break;
2037 	  }
2038       if (size > max_size)
2039 	continue;
2040 
2041       /* Final check: there must not be any incoming abnormal edges.  */
2042       all_flags = 0;
2043       FOR_EACH_EDGE (e, ei, bb->preds)
2044 	all_flags |= e->flags;
2045       if (all_flags & EDGE_COMPLEX)
2046 	continue;
2047 
2048       bitmap_set_bit (candidates, bb->index);
2049     }
2050 
2051   /* Nothing to do if there is no computed jump here.  */
2052   if (bitmap_empty_p (candidates))
2053     goto done;
2054 
2055   /* Duplicate computed gotos.  */
2056   FOR_EACH_BB (bb)
2057     {
2058       if (bb->il.rtl->visited)
2059 	continue;
2060 
2061       bb->il.rtl->visited = 1;
2062 
2063       /* BB must have one outgoing edge.  That edge must not lead to
2064          the exit block or the next block.
2065 	 The destination must have more than one predecessor.  */
2066       if (!single_succ_p (bb)
2067 	  || single_succ (bb) == EXIT_BLOCK_PTR
2068 	  || single_succ (bb) == bb->next_bb
2069 	  || single_pred_p (single_succ (bb)))
2070 	continue;
2071 
2072       /* The successor block has to be a duplication candidate.  */
2073       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2074 	continue;
2075 
2076       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2077       new_bb->aux = bb->aux;
2078       bb->aux = new_bb;
2079       new_bb->il.rtl->visited = 1;
2080     }
2081 
2082 done:
2083   cfg_layout_finalize ();
2084 
2085   BITMAP_FREE (candidates);
2086 }
2087 
2088 struct tree_opt_pass pass_duplicate_computed_gotos =
2089 {
2090   "compgotos",                          /* name */
2091   gate_duplicate_computed_gotos,        /* gate */
2092   duplicate_computed_gotos,             /* execute */
2093   NULL,                                 /* sub */
2094   NULL,                                 /* next */
2095   0,                                    /* static_pass_number */
2096   TV_REORDER_BLOCKS,                    /* tv_id */
2097   0,                                    /* properties_required */
2098   0,                                    /* properties_provided */
2099   0,                                    /* properties_destroyed */
2100   0,                                    /* todo_flags_start */
2101   TODO_dump_func,                       /* todo_flags_finish */
2102   0                                     /* letter */
2103 };
2104 
2105 
2106 /* This function is the main 'entrance' for the optimization that
2107    partitions hot and cold basic blocks into separate sections of the
2108    .o file (to improve performance and cache locality).  Ideally it
2109    would be called after all optimizations that rearrange the CFG have
2110    been called.  However part of this optimization may introduce new
2111    register usage, so it must be called before register allocation has
2112    occurred.  This means that this optimization is actually called
2113    well before the optimization that reorders basic blocks (see
2114    function above).
2115 
2116    This optimization checks the feedback information to determine
2117    which basic blocks are hot/cold, updates flags on the basic blocks
2118    to indicate which section they belong in.  This information is
2119    later used for writing out sections in the .o file.  Because hot
2120    and cold sections can be arbitrarily large (within the bounds of
2121    memory), far beyond the size of a single function, it is necessary
2122    to fix up all edges that cross section boundaries, to make sure the
2123    instructions used can actually span the required distance.  The
2124    fixes are described below.
2125 
2126    Fall-through edges must be changed into jumps; it is not safe or
2127    legal to fall through across a section boundary.  Whenever a
2128    fall-through edge crossing a section boundary is encountered, a new
2129    basic block is inserted (in the same section as the fall-through
2130    source), and the fall through edge is redirected to the new basic
2131    block.  The new basic block contains an unconditional jump to the
2132    original fall-through target.  (If the unconditional jump is
2133    insufficient to cross section boundaries, that is dealt with a
2134    little later, see below).
2135 
2136    In order to deal with architectures that have short conditional
2137    branches (which cannot span all of memory) we take any conditional
2138    jump that attempts to cross a section boundary and add a level of
2139    indirection: it becomes a conditional jump to a new basic block, in
2140    the same section.  The new basic block contains an unconditional
2141    jump to the original target, in the other section.
2142 
2143    For those architectures whose unconditional branch is also
2144    incapable of reaching all of memory, those unconditional jumps are
2145    converted into indirect jumps, through a register.
2146 
2147    IMPORTANT NOTE: This optimization causes some messy interactions
2148    with the cfg cleanup optimizations; those optimizations want to
2149    merge blocks wherever possible, and to collapse indirect jump
2150    sequences (change "A jumps to B jumps to C" directly into "A jumps
2151    to C").  Those optimizations can undo the jump fixes that
2152    partitioning is required to make (see above), in order to ensure
2153    that jumps attempting to cross section boundaries are really able
2154    to cover whatever distance the jump requires (on many architectures
2155    conditional or unconditional jumps are not able to reach all of
2156    memory).  Therefore tests have to be inserted into each such
2157    optimization to make sure that it does not undo stuff necessary to
2158    cross partition boundaries.  This would be much less of a problem
2159    if we could perform this optimization later in the compilation, but
2160    unfortunately the fact that we may need to create indirect jumps
2161    (through registers) requires that this optimization be performed
2162    before register allocation.  */
2163 
2164 void
partition_hot_cold_basic_blocks(void)2165 partition_hot_cold_basic_blocks (void)
2166 {
2167   basic_block cur_bb;
2168   edge *crossing_edges;
2169   int n_crossing_edges;
2170   int max_edges = 2 * last_basic_block;
2171 
2172   if (n_basic_blocks <= 1)
2173     return;
2174 
2175   crossing_edges = xcalloc (max_edges, sizeof (edge));
2176 
2177   cfg_layout_initialize (0);
2178 
2179   FOR_EACH_BB (cur_bb)
2180     if (cur_bb->index >= 0
2181  	&& cur_bb->next_bb->index >= 0)
2182       cur_bb->aux = cur_bb->next_bb;
2183 
2184   find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2185 							&n_crossing_edges,
2186 							&max_edges);
2187 
2188   if (n_crossing_edges > 0)
2189     fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2190 
2191   free (crossing_edges);
2192 
2193   cfg_layout_finalize();
2194 }
2195 
2196 static bool
gate_handle_reorder_blocks(void)2197 gate_handle_reorder_blocks (void)
2198 {
2199   return (optimize > 0);
2200 }
2201 
2202 
2203 /* Reorder basic blocks.  */
2204 static void
rest_of_handle_reorder_blocks(void)2205 rest_of_handle_reorder_blocks (void)
2206 {
2207   bool changed;
2208   unsigned int liveness_flags;
2209 
2210   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2211      splitting possibly introduced more crossjumping opportunities.  */
2212   liveness_flags = (!HAVE_conditional_execution ? CLEANUP_UPDATE_LIFE : 0);
2213   changed = cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2214 
2215   if (flag_sched2_use_traces && flag_schedule_insns_after_reload)
2216     {
2217       timevar_push (TV_TRACER);
2218       tracer (liveness_flags);
2219       timevar_pop (TV_TRACER);
2220     }
2221 
2222   if (flag_reorder_blocks || flag_reorder_blocks_and_partition)
2223     reorder_basic_blocks (liveness_flags);
2224   if (flag_reorder_blocks || flag_reorder_blocks_and_partition
2225       || (flag_sched2_use_traces && flag_schedule_insns_after_reload))
2226     changed |= cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2227 
2228   /* On conditional execution targets we can not update the life cheaply, so
2229      we deffer the updating to after both cleanups.  This may lose some cases
2230      but should not be terribly bad.  */
2231   if (changed && HAVE_conditional_execution)
2232     update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2233                       PROP_DEATH_NOTES);
2234 
2235   /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2236   insert_section_boundary_note ();
2237 }
2238 
2239 struct tree_opt_pass pass_reorder_blocks =
2240 {
2241   "bbro",                               /* name */
2242   gate_handle_reorder_blocks,           /* gate */
2243   rest_of_handle_reorder_blocks,        /* execute */
2244   NULL,                                 /* sub */
2245   NULL,                                 /* next */
2246   0,                                    /* static_pass_number */
2247   TV_REORDER_BLOCKS,                    /* tv_id */
2248   0,                                    /* properties_required */
2249   0,                                    /* properties_provided */
2250   0,                                    /* properties_destroyed */
2251   0,                                    /* todo_flags_start */
2252   TODO_dump_func,                       /* todo_flags_finish */
2253   'B'                                   /* letter */
2254 };
2255 
2256 static bool
gate_handle_partition_blocks(void)2257 gate_handle_partition_blocks (void)
2258 {
2259   /* The optimization to partition hot/cold basic blocks into separate
2260      sections of the .o file does not work well with linkonce or with
2261      user defined section attributes.  Don't call it if either case
2262      arises.  */
2263 
2264   return (flag_reorder_blocks_and_partition
2265           && !DECL_ONE_ONLY (current_function_decl)
2266           && !user_defined_section_attribute);
2267 }
2268 
2269 /* Partition hot and cold basic blocks.  */
2270 static void
rest_of_handle_partition_blocks(void)2271 rest_of_handle_partition_blocks (void)
2272 {
2273   no_new_pseudos = 0;
2274   partition_hot_cold_basic_blocks ();
2275   allocate_reg_life_data ();
2276   update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2277                     PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES);
2278   no_new_pseudos = 1;
2279 }
2280 
2281 struct tree_opt_pass pass_partition_blocks =
2282 {
2283   "bbpart",                             /* name */
2284   gate_handle_partition_blocks,         /* gate */
2285   rest_of_handle_partition_blocks,      /* execute */
2286   NULL,                                 /* sub */
2287   NULL,                                 /* next */
2288   0,                                    /* static_pass_number */
2289   TV_REORDER_BLOCKS,                    /* tv_id */
2290   0,                                    /* properties_required */
2291   0,                                    /* properties_provided */
2292   0,                                    /* properties_destroyed */
2293   0,                                    /* todo_flags_start */
2294   TODO_dump_func,                       /* todo_flags_finish */
2295   0                                     /* letter */
2296 };
2297 
2298 
2299