1 /* Basic block reordering routines for the GNU compiler.
2    Copyright (C) 2000-2014 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING3.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 /* This (greedy) algorithm constructs traces in several rounds.
21    The construction starts from "seeds".  The seed for the first round
22    is the entry point of the function.  When there are more than one seed,
23    the one with the lowest key in the heap is selected first (see bb_to_key).
24    Then the algorithm repeatedly adds the most probable successor to the end
25    of a trace.  Finally it connects the traces.
26 
27    There are two parameters: Branch Threshold and Exec Threshold.
28    If the probability of an edge to a successor of the current basic block is
29    lower than Branch Threshold or its frequency is lower than Exec Threshold,
30    then the successor will be the seed in one of the next rounds.
31    Each round has these parameters lower than the previous one.
32    The last round has to have these parameters set to zero so that the
33    remaining blocks are picked up.
34 
35    The algorithm selects the most probable successor from all unvisited
36    successors and successors that have been added to this trace.
37    The other successors (that has not been "sent" to the next round) will be
38    other seeds for this round and the secondary traces will start from them.
39    If the successor has not been visited in this trace, it is added to the
40    trace (however, there is some heuristic for simple branches).
41    If the successor has been visited in this trace, a loop has been found.
42    If the loop has many iterations, the loop is rotated so that the source
43    block of the most probable edge going out of the loop is the last block
44    of the trace.
45    If the loop has few iterations and there is no edge from the last block of
46    the loop going out of the loop, the loop header is duplicated.
47 
48    When connecting traces, the algorithm first checks whether there is an edge
49    from the last block of a trace to the first block of another trace.
50    When there are still some unconnected traces it checks whether there exists
51    a basic block BB such that BB is a successor of the last block of a trace
52    and BB is a predecessor of the first block of another trace.  In this case,
53    BB is duplicated, added at the end of the first trace and the traces are
54    connected through it.
55    The rest of traces are simply connected so there will be a jump to the
56    beginning of the rest of traces.
57 
58    The above description is for the full algorithm, which is used when the
59    function is optimized for speed.  When the function is optimized for size,
60    in order to reduce long jumps and connect more fallthru edges, the
61    algorithm is modified as follows:
62    (1) Break long traces to short ones.  A trace is broken at a block that has
63    multiple predecessors/ successors during trace discovery.  When connecting
64    traces, only connect Trace n with Trace n + 1.  This change reduces most
65    long jumps compared with the above algorithm.
66    (2) Ignore the edge probability and frequency for fallthru edges.
67    (3) Keep the original order of blocks when there is no chance to fall
68    through.  We rely on the results of cfg_cleanup.
69 
70    To implement the change for code size optimization, block's index is
71    selected as the key and all traces are found in one round.
72 
73    References:
74 
75    "Software Trace Cache"
76    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
77    http://citeseer.nj.nec.com/15361.html
78 
79 */
80 
81 #include "config.h"
82 #include "system.h"
83 #include "coretypes.h"
84 #include "tm.h"
85 #include "tree.h"
86 #include "rtl.h"
87 #include "regs.h"
88 #include "flags.h"
89 #include "output.h"
90 #include "fibheap.h"
91 #include "target.h"
92 #include "function.h"
93 #include "tm_p.h"
94 #include "obstack.h"
95 #include "expr.h"
96 #include "params.h"
97 #include "diagnostic-core.h"
98 #include "toplev.h" /* user_defined_section_attribute */
99 #include "tree-pass.h"
100 #include "df.h"
101 #include "bb-reorder.h"
102 #include "except.h"
103 
104 /* The number of rounds.  In most cases there will only be 4 rounds, but
105    when partitioning hot and cold basic blocks into separate sections of
106    the object file there will be an extra round.  */
107 #define N_ROUNDS 5
108 
109 /* Stubs in case we don't have a return insn.
110    We have to check at run time too, not only compile time.  */
111 
112 #ifndef HAVE_return
113 #define HAVE_return 0
114 #define gen_return() NULL_RTX
115 #endif
116 
117 
118 struct target_bb_reorder default_target_bb_reorder;
119 #if SWITCHABLE_TARGET
120 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
121 #endif
122 
123 #define uncond_jump_length \
124   (this_target_bb_reorder->x_uncond_jump_length)
125 
126 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
127 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
128 
129 /* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
130 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
131 
132 /* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
133    block the edge destination is not duplicated while connecting traces.  */
134 #define DUPLICATION_THRESHOLD 100
135 
136 /* Structure to hold needed information for each basic block.  */
137 typedef struct bbro_basic_block_data_def
138 {
139   /* Which trace is the bb start of (-1 means it is not a start of any).  */
140   int start_of_trace;
141 
142   /* Which trace is the bb end of (-1 means it is not an end of any).  */
143   int end_of_trace;
144 
145   /* Which trace is the bb in?  */
146   int in_trace;
147 
148   /* Which trace was this bb visited in?  */
149   int visited;
150 
151   /* Which heap is BB in (if any)?  */
152   fibheap_t heap;
153 
154   /* Which heap node is BB in (if any)?  */
155   fibnode_t node;
156 } bbro_basic_block_data;
157 
158 /* The current size of the following dynamic array.  */
159 static int array_size;
160 
161 /* The array which holds needed information for basic blocks.  */
162 static bbro_basic_block_data *bbd;
163 
164 /* To avoid frequent reallocation the size of arrays is greater than needed,
165    the number of elements is (not less than) 1.25 * size_wanted.  */
166 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
167 
168 /* Free the memory and set the pointer to NULL.  */
169 #define FREE(P) (gcc_assert (P), free (P), P = 0)
170 
171 /* Structure for holding information about a trace.  */
172 struct trace
173 {
174   /* First and last basic block of the trace.  */
175   basic_block first, last;
176 
177   /* The round of the STC creation which this trace was found in.  */
178   int round;
179 
180   /* The length (i.e. the number of basic blocks) of the trace.  */
181   int length;
182 };
183 
184 /* Maximum frequency and count of one of the entry blocks.  */
185 static int max_entry_frequency;
186 static gcov_type max_entry_count;
187 
188 /* Local function prototypes.  */
189 static void find_traces (int *, struct trace *);
190 static basic_block rotate_loop (edge, struct trace *, int);
191 static void mark_bb_visited (basic_block, int);
192 static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
193 				 int, fibheap_t *, int);
194 static basic_block copy_bb (basic_block, edge, basic_block, int);
195 static fibheapkey_t bb_to_key (basic_block);
196 static bool better_edge_p (const_basic_block, const_edge, int, int, int, int,
197 			   const_edge);
198 static bool connect_better_edge_p (const_edge, bool, int, const_edge,
199 				   struct trace *);
200 static void connect_traces (int, struct trace *);
201 static bool copy_bb_p (const_basic_block, int);
202 static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
203 
204 /* Return the trace number in which BB was visited.  */
205 
206 static int
bb_visited_trace(const_basic_block bb)207 bb_visited_trace (const_basic_block bb)
208 {
209   gcc_assert (bb->index < array_size);
210   return bbd[bb->index].visited;
211 }
212 
213 /* This function marks BB that it was visited in trace number TRACE.  */
214 
215 static void
mark_bb_visited(basic_block bb,int trace)216 mark_bb_visited (basic_block bb, int trace)
217 {
218   bbd[bb->index].visited = trace;
219   if (bbd[bb->index].heap)
220     {
221       fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
222       bbd[bb->index].heap = NULL;
223       bbd[bb->index].node = NULL;
224     }
225 }
226 
227 /* Check to see if bb should be pushed into the next round of trace
228    collections or not.  Reasons for pushing the block forward are 1).
229    If the block is cold, we are doing partitioning, and there will be
230    another round (cold partition blocks are not supposed to be
231    collected into traces until the very last round); or 2). There will
232    be another round, and the basic block is not "hot enough" for the
233    current round of trace collection.  */
234 
235 static bool
push_to_next_round_p(const_basic_block bb,int round,int number_of_rounds,int exec_th,gcov_type count_th)236 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
237 		      int exec_th, gcov_type count_th)
238 {
239   bool there_exists_another_round;
240   bool block_not_hot_enough;
241 
242   there_exists_another_round = round < number_of_rounds - 1;
243 
244   block_not_hot_enough = (bb->frequency < exec_th
245 			  || bb->count < count_th
246 			  || probably_never_executed_bb_p (cfun, bb));
247 
248   if (there_exists_another_round
249       && block_not_hot_enough)
250     return true;
251   else
252     return false;
253 }
254 
255 /* Find the traces for Software Trace Cache.  Chain each trace through
256    RBI()->next.  Store the number of traces to N_TRACES and description of
257    traces to TRACES.  */
258 
259 static void
find_traces(int * n_traces,struct trace * traces)260 find_traces (int *n_traces, struct trace *traces)
261 {
262   int i;
263   int number_of_rounds;
264   edge e;
265   edge_iterator ei;
266   fibheap_t heap;
267 
268   /* Add one extra round of trace collection when partitioning hot/cold
269      basic blocks into separate sections.  The last round is for all the
270      cold blocks (and ONLY the cold blocks).  */
271 
272   number_of_rounds = N_ROUNDS - 1;
273 
274   /* Insert entry points of function into heap.  */
275   heap = fibheap_new ();
276   max_entry_frequency = 0;
277   max_entry_count = 0;
278   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
279     {
280       bbd[e->dest->index].heap = heap;
281       bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
282 						    e->dest);
283       if (e->dest->frequency > max_entry_frequency)
284 	max_entry_frequency = e->dest->frequency;
285       if (e->dest->count > max_entry_count)
286 	max_entry_count = e->dest->count;
287     }
288 
289   /* Find the traces.  */
290   for (i = 0; i < number_of_rounds; i++)
291     {
292       gcov_type count_threshold;
293 
294       if (dump_file)
295 	fprintf (dump_file, "STC - round %d\n", i + 1);
296 
297       if (max_entry_count < INT_MAX / 1000)
298 	count_threshold = max_entry_count * exec_threshold[i] / 1000;
299       else
300 	count_threshold = max_entry_count / 1000 * exec_threshold[i];
301 
302       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
303 			   max_entry_frequency * exec_threshold[i] / 1000,
304 			   count_threshold, traces, n_traces, i, &heap,
305 			   number_of_rounds);
306     }
307   fibheap_delete (heap);
308 
309   if (dump_file)
310     {
311       for (i = 0; i < *n_traces; i++)
312 	{
313 	  basic_block bb;
314 	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
315 		   traces[i].round + 1);
316 	  for (bb = traces[i].first;
317 	       bb != traces[i].last;
318 	       bb = (basic_block) bb->aux)
319 	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
320 	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
321 	}
322       fflush (dump_file);
323     }
324 }
325 
326 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
327    (with sequential number TRACE_N).  */
328 
329 static basic_block
rotate_loop(edge back_edge,struct trace * trace,int trace_n)330 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
331 {
332   basic_block bb;
333 
334   /* Information about the best end (end after rotation) of the loop.  */
335   basic_block best_bb = NULL;
336   edge best_edge = NULL;
337   int best_freq = -1;
338   gcov_type best_count = -1;
339   /* The best edge is preferred when its destination is not visited yet
340      or is a start block of some trace.  */
341   bool is_preferred = false;
342 
343   /* Find the most frequent edge that goes out from current trace.  */
344   bb = back_edge->dest;
345   do
346     {
347       edge e;
348       edge_iterator ei;
349 
350       FOR_EACH_EDGE (e, ei, bb->succs)
351 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
352 	    && bb_visited_trace (e->dest) != trace_n
353 	    && (e->flags & EDGE_CAN_FALLTHRU)
354 	    && !(e->flags & EDGE_COMPLEX))
355 	{
356 	  if (is_preferred)
357 	    {
358 	      /* The best edge is preferred.  */
359 	      if (!bb_visited_trace (e->dest)
360 		  || bbd[e->dest->index].start_of_trace >= 0)
361 		{
362 		  /* The current edge E is also preferred.  */
363 		  int freq = EDGE_FREQUENCY (e);
364 		  if (freq > best_freq || e->count > best_count)
365 		    {
366 		      best_freq = freq;
367 		      best_count = e->count;
368 		      best_edge = e;
369 		      best_bb = bb;
370 		    }
371 		}
372 	    }
373 	  else
374 	    {
375 	      if (!bb_visited_trace (e->dest)
376 		  || bbd[e->dest->index].start_of_trace >= 0)
377 		{
378 		  /* The current edge E is preferred.  */
379 		  is_preferred = true;
380 		  best_freq = EDGE_FREQUENCY (e);
381 		  best_count = e->count;
382 		  best_edge = e;
383 		  best_bb = bb;
384 		}
385 	      else
386 		{
387 		  int freq = EDGE_FREQUENCY (e);
388 		  if (!best_edge || freq > best_freq || e->count > best_count)
389 		    {
390 		      best_freq = freq;
391 		      best_count = e->count;
392 		      best_edge = e;
393 		      best_bb = bb;
394 		    }
395 		}
396 	    }
397 	}
398       bb = (basic_block) bb->aux;
399     }
400   while (bb != back_edge->dest);
401 
402   if (best_bb)
403     {
404       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
405 	 the trace.  */
406       if (back_edge->dest == trace->first)
407 	{
408 	  trace->first = (basic_block) best_bb->aux;
409 	}
410       else
411 	{
412 	  basic_block prev_bb;
413 
414 	  for (prev_bb = trace->first;
415 	       prev_bb->aux != back_edge->dest;
416 	       prev_bb = (basic_block) prev_bb->aux)
417 	    ;
418 	  prev_bb->aux = best_bb->aux;
419 
420 	  /* Try to get rid of uncond jump to cond jump.  */
421 	  if (single_succ_p (prev_bb))
422 	    {
423 	      basic_block header = single_succ (prev_bb);
424 
425 	      /* Duplicate HEADER if it is a small block containing cond jump
426 		 in the end.  */
427 	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
428 		  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
429 				     NULL_RTX))
430 		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
431 	    }
432 	}
433     }
434   else
435     {
436       /* We have not found suitable loop tail so do no rotation.  */
437       best_bb = back_edge->src;
438     }
439   best_bb->aux = NULL;
440   return best_bb;
441 }
442 
443 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
444    not include basic blocks whose probability is lower than BRANCH_TH or whose
445    frequency is lower than EXEC_TH into traces (or whose count is lower than
446    COUNT_TH).  Store the new traces into TRACES and modify the number of
447    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
448    The function expects starting basic blocks to be in *HEAP and will delete
449    *HEAP and store starting points for the next round into new *HEAP.  */
450 
451 static void
find_traces_1_round(int branch_th,int exec_th,gcov_type count_th,struct trace * traces,int * n_traces,int round,fibheap_t * heap,int number_of_rounds)452 find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
453 		     struct trace *traces, int *n_traces, int round,
454 		     fibheap_t *heap, int number_of_rounds)
455 {
456   /* Heap for discarded basic blocks which are possible starting points for
457      the next round.  */
458   fibheap_t new_heap = fibheap_new ();
459   bool for_size = optimize_function_for_size_p (cfun);
460 
461   while (!fibheap_empty (*heap))
462     {
463       basic_block bb;
464       struct trace *trace;
465       edge best_edge, e;
466       fibheapkey_t key;
467       edge_iterator ei;
468 
469       bb = (basic_block) fibheap_extract_min (*heap);
470       bbd[bb->index].heap = NULL;
471       bbd[bb->index].node = NULL;
472 
473       if (dump_file)
474 	fprintf (dump_file, "Getting bb %d\n", bb->index);
475 
476       /* If the BB's frequency is too low, send BB to the next round.  When
477 	 partitioning hot/cold blocks into separate sections, make sure all
478 	 the cold blocks (and ONLY the cold blocks) go into the (extra) final
479 	 round.  When optimizing for size, do not push to next round.  */
480 
481       if (!for_size
482 	  && push_to_next_round_p (bb, round, number_of_rounds, exec_th,
483 				   count_th))
484 	{
485 	  int key = bb_to_key (bb);
486 	  bbd[bb->index].heap = new_heap;
487 	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
488 
489 	  if (dump_file)
490 	    fprintf (dump_file,
491 		     "  Possible start point of next round: %d (key: %d)\n",
492 		     bb->index, key);
493 	  continue;
494 	}
495 
496       trace = traces + *n_traces;
497       trace->first = bb;
498       trace->round = round;
499       trace->length = 0;
500       bbd[bb->index].in_trace = *n_traces;
501       (*n_traces)++;
502 
503       do
504 	{
505 	  int prob, freq;
506 	  bool ends_in_call;
507 
508 	  /* The probability and frequency of the best edge.  */
509 	  int best_prob = INT_MIN / 2;
510 	  int best_freq = INT_MIN / 2;
511 
512 	  best_edge = NULL;
513 	  mark_bb_visited (bb, *n_traces);
514 	  trace->length++;
515 
516 	  if (dump_file)
517 	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
518 		     bb->index, *n_traces - 1);
519 
520 	  ends_in_call = block_ends_with_call_p (bb);
521 
522 	  /* Select the successor that will be placed after BB.  */
523 	  FOR_EACH_EDGE (e, ei, bb->succs)
524 	    {
525 	      gcc_assert (!(e->flags & EDGE_FAKE));
526 
527 	      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
528 		continue;
529 
530 	      if (bb_visited_trace (e->dest)
531 		  && bb_visited_trace (e->dest) != *n_traces)
532 		continue;
533 
534 	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
535 		continue;
536 
537 	      prob = e->probability;
538 	      freq = e->dest->frequency;
539 
540 	      /* The only sensible preference for a call instruction is the
541 		 fallthru edge.  Don't bother selecting anything else.  */
542 	      if (ends_in_call)
543 		{
544 		  if (e->flags & EDGE_CAN_FALLTHRU)
545 		    {
546 		      best_edge = e;
547 		      best_prob = prob;
548 		      best_freq = freq;
549 		    }
550 		  continue;
551 		}
552 
553 	      /* Edge that cannot be fallthru or improbable or infrequent
554 		 successor (i.e. it is unsuitable successor).  When optimizing
555 		 for size, ignore the probability and frequency.  */
556 	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
557 		  || ((prob < branch_th || EDGE_FREQUENCY (e) < exec_th
558 		      || e->count < count_th) && (!for_size)))
559 		continue;
560 
561 	      /* If partitioning hot/cold basic blocks, don't consider edges
562 		 that cross section boundaries.  */
563 
564 	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
565 				 best_edge))
566 		{
567 		  best_edge = e;
568 		  best_prob = prob;
569 		  best_freq = freq;
570 		}
571 	    }
572 
573 	  /* If the best destination has multiple predecessors, and can be
574 	     duplicated cheaper than a jump, don't allow it to be added
575 	     to a trace.  We'll duplicate it when connecting traces.  */
576 	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
577 	      && copy_bb_p (best_edge->dest, 0))
578 	    best_edge = NULL;
579 
580 	  /* If the best destination has multiple successors or predecessors,
581 	     don't allow it to be added when optimizing for size.  This makes
582 	     sure predecessors with smaller index are handled before the best
583 	     destinarion.  It breaks long trace and reduces long jumps.
584 
585 	     Take if-then-else as an example.
586 		A
587 	       / \
588 	      B   C
589 	       \ /
590 		D
591 	     If we do not remove the best edge B->D/C->D, the final order might
592 	     be A B D ... C.  C is at the end of the program.  If D's successors
593 	     and D are complicated, might need long jumps for A->C and C->D.
594 	     Similar issue for order: A C D ... B.
595 
596 	     After removing the best edge, the final result will be ABCD/ ACBD.
597 	     It does not add jump compared with the previous order.  But it
598 	     reduces the possibility of long jumps.  */
599 	  if (best_edge && for_size
600 	      && (EDGE_COUNT (best_edge->dest->succs) > 1
601 		 || EDGE_COUNT (best_edge->dest->preds) > 1))
602 	    best_edge = NULL;
603 
604 	  /* Add all non-selected successors to the heaps.  */
605 	  FOR_EACH_EDGE (e, ei, bb->succs)
606 	    {
607 	      if (e == best_edge
608 		  || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
609 		  || bb_visited_trace (e->dest))
610 		continue;
611 
612 	      key = bb_to_key (e->dest);
613 
614 	      if (bbd[e->dest->index].heap)
615 		{
616 		  /* E->DEST is already in some heap.  */
617 		  if (key != bbd[e->dest->index].node->key)
618 		    {
619 		      if (dump_file)
620 			{
621 			  fprintf (dump_file,
622 				   "Changing key for bb %d from %ld to %ld.\n",
623 				   e->dest->index,
624 				   (long) bbd[e->dest->index].node->key,
625 				   key);
626 			}
627 		      fibheap_replace_key (bbd[e->dest->index].heap,
628 					   bbd[e->dest->index].node, key);
629 		    }
630 		}
631 	      else
632 		{
633 		  fibheap_t which_heap = *heap;
634 
635 		  prob = e->probability;
636 		  freq = EDGE_FREQUENCY (e);
637 
638 		  if (!(e->flags & EDGE_CAN_FALLTHRU)
639 		      || (e->flags & EDGE_COMPLEX)
640 		      || prob < branch_th || freq < exec_th
641 		      || e->count < count_th)
642 		    {
643 		      /* When partitioning hot/cold basic blocks, make sure
644 			 the cold blocks (and only the cold blocks) all get
645 			 pushed to the last round of trace collection.  When
646 			 optimizing for size, do not push to next round.  */
647 
648 		      if (!for_size && push_to_next_round_p (e->dest, round,
649 							     number_of_rounds,
650 							     exec_th, count_th))
651 			which_heap = new_heap;
652 		    }
653 
654 		  bbd[e->dest->index].heap = which_heap;
655 		  bbd[e->dest->index].node = fibheap_insert (which_heap,
656 								key, e->dest);
657 
658 		  if (dump_file)
659 		    {
660 		      fprintf (dump_file,
661 			       "  Possible start of %s round: %d (key: %ld)\n",
662 			       (which_heap == new_heap) ? "next" : "this",
663 			       e->dest->index, (long) key);
664 		    }
665 
666 		}
667 	    }
668 
669 	  if (best_edge) /* Suitable successor was found.  */
670 	    {
671 	      if (bb_visited_trace (best_edge->dest) == *n_traces)
672 		{
673 		  /* We do nothing with one basic block loops.  */
674 		  if (best_edge->dest != bb)
675 		    {
676 		      if (EDGE_FREQUENCY (best_edge)
677 			  > 4 * best_edge->dest->frequency / 5)
678 			{
679 			  /* The loop has at least 4 iterations.  If the loop
680 			     header is not the first block of the function
681 			     we can rotate the loop.  */
682 
683 			  if (best_edge->dest
684 			      != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
685 			    {
686 			      if (dump_file)
687 				{
688 				  fprintf (dump_file,
689 					   "Rotating loop %d - %d\n",
690 					   best_edge->dest->index, bb->index);
691 				}
692 			      bb->aux = best_edge->dest;
693 			      bbd[best_edge->dest->index].in_trace =
694 							     (*n_traces) - 1;
695 			      bb = rotate_loop (best_edge, trace, *n_traces);
696 			    }
697 			}
698 		      else
699 			{
700 			  /* The loop has less than 4 iterations.  */
701 
702 			  if (single_succ_p (bb)
703 			      && copy_bb_p (best_edge->dest,
704 			      		    optimize_edge_for_speed_p
705 			      		    (best_edge)))
706 			    {
707 			      bb = copy_bb (best_edge->dest, best_edge, bb,
708 					    *n_traces);
709 			      trace->length++;
710 			    }
711 			}
712 		    }
713 
714 		  /* Terminate the trace.  */
715 		  break;
716 		}
717 	      else
718 		{
719 		  /* Check for a situation
720 
721 		    A
722 		   /|
723 		  B |
724 		   \|
725 		    C
726 
727 		  where
728 		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
729 		    >= EDGE_FREQUENCY (AC).
730 		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
731 		  Best ordering is then A B C.
732 
733 		  When optimizing for size, A B C is always the best order.
734 
735 		  This situation is created for example by:
736 
737 		  if (A) B;
738 		  C;
739 
740 		  */
741 
742 		  FOR_EACH_EDGE (e, ei, bb->succs)
743 		    if (e != best_edge
744 			&& (e->flags & EDGE_CAN_FALLTHRU)
745 			&& !(e->flags & EDGE_COMPLEX)
746 			&& !bb_visited_trace (e->dest)
747 			&& single_pred_p (e->dest)
748 			&& !(e->flags & EDGE_CROSSING)
749 			&& single_succ_p (e->dest)
750 			&& (single_succ_edge (e->dest)->flags
751 			    & EDGE_CAN_FALLTHRU)
752 			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
753 			&& single_succ (e->dest) == best_edge->dest
754 			&& (2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)
755 			    || for_size))
756 		      {
757 			best_edge = e;
758 			if (dump_file)
759 			  fprintf (dump_file, "Selecting BB %d\n",
760 				   best_edge->dest->index);
761 			break;
762 		      }
763 
764 		  bb->aux = best_edge->dest;
765 		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
766 		  bb = best_edge->dest;
767 		}
768 	    }
769 	}
770       while (best_edge);
771       trace->last = bb;
772       bbd[trace->first->index].start_of_trace = *n_traces - 1;
773       bbd[trace->last->index].end_of_trace = *n_traces - 1;
774 
775       /* The trace is terminated so we have to recount the keys in heap
776 	 (some block can have a lower key because now one of its predecessors
777 	 is an end of the trace).  */
778       FOR_EACH_EDGE (e, ei, bb->succs)
779 	{
780 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
781 	      || bb_visited_trace (e->dest))
782 	    continue;
783 
784 	  if (bbd[e->dest->index].heap)
785 	    {
786 	      key = bb_to_key (e->dest);
787 	      if (key != bbd[e->dest->index].node->key)
788 		{
789 		  if (dump_file)
790 		    {
791 		      fprintf (dump_file,
792 			       "Changing key for bb %d from %ld to %ld.\n",
793 			       e->dest->index,
794 			       (long) bbd[e->dest->index].node->key, key);
795 		    }
796 		  fibheap_replace_key (bbd[e->dest->index].heap,
797 				       bbd[e->dest->index].node,
798 				       key);
799 		}
800 	    }
801 	}
802     }
803 
804   fibheap_delete (*heap);
805 
806   /* "Return" the new heap.  */
807   *heap = new_heap;
808 }
809 
810 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
811    it to trace after BB, mark OLD_BB visited and update pass' data structures
812    (TRACE is a number of trace which OLD_BB is duplicated to).  */
813 
814 static basic_block
copy_bb(basic_block old_bb,edge e,basic_block bb,int trace)815 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
816 {
817   basic_block new_bb;
818 
819   new_bb = duplicate_block (old_bb, e, bb);
820   BB_COPY_PARTITION (new_bb, old_bb);
821 
822   gcc_assert (e->dest == new_bb);
823 
824   if (dump_file)
825     fprintf (dump_file,
826 	     "Duplicated bb %d (created bb %d)\n",
827 	     old_bb->index, new_bb->index);
828 
829   if (new_bb->index >= array_size
830       || last_basic_block_for_fn (cfun) > array_size)
831     {
832       int i;
833       int new_size;
834 
835       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
836       new_size = GET_ARRAY_SIZE (new_size);
837       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
838       for (i = array_size; i < new_size; i++)
839 	{
840 	  bbd[i].start_of_trace = -1;
841 	  bbd[i].end_of_trace = -1;
842 	  bbd[i].in_trace = -1;
843 	  bbd[i].visited = 0;
844 	  bbd[i].heap = NULL;
845 	  bbd[i].node = NULL;
846 	}
847       array_size = new_size;
848 
849       if (dump_file)
850 	{
851 	  fprintf (dump_file,
852 		   "Growing the dynamic array to %d elements.\n",
853 		   array_size);
854 	}
855     }
856 
857   gcc_assert (!bb_visited_trace (e->dest));
858   mark_bb_visited (new_bb, trace);
859   new_bb->aux = bb->aux;
860   bb->aux = new_bb;
861 
862   bbd[new_bb->index].in_trace = trace;
863 
864   return new_bb;
865 }
866 
867 /* Compute and return the key (for the heap) of the basic block BB.  */
868 
869 static fibheapkey_t
bb_to_key(basic_block bb)870 bb_to_key (basic_block bb)
871 {
872   edge e;
873   edge_iterator ei;
874   int priority = 0;
875 
876   /* Use index as key to align with its original order.  */
877   if (optimize_function_for_size_p (cfun))
878     return bb->index;
879 
880   /* Do not start in probably never executed blocks.  */
881 
882   if (BB_PARTITION (bb) == BB_COLD_PARTITION
883       || probably_never_executed_bb_p (cfun, bb))
884     return BB_FREQ_MAX;
885 
886   /* Prefer blocks whose predecessor is an end of some trace
887      or whose predecessor edge is EDGE_DFS_BACK.  */
888   FOR_EACH_EDGE (e, ei, bb->preds)
889     {
890       if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
891 	   && bbd[e->src->index].end_of_trace >= 0)
892 	  || (e->flags & EDGE_DFS_BACK))
893 	{
894 	  int edge_freq = EDGE_FREQUENCY (e);
895 
896 	  if (edge_freq > priority)
897 	    priority = edge_freq;
898 	}
899     }
900 
901   if (priority)
902     /* The block with priority should have significantly lower key.  */
903     return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
904 
905   return -bb->frequency;
906 }
907 
908 /* Return true when the edge E from basic block BB is better than the temporary
909    best edge (details are in function).  The probability of edge E is PROB. The
910    frequency of the successor is FREQ.  The current best probability is
911    BEST_PROB, the best frequency is BEST_FREQ.
912    The edge is considered to be equivalent when PROB does not differ much from
913    BEST_PROB; similarly for frequency.  */
914 
915 static bool
better_edge_p(const_basic_block bb,const_edge e,int prob,int freq,int best_prob,int best_freq,const_edge cur_best_edge)916 better_edge_p (const_basic_block bb, const_edge e, int prob, int freq,
917 	       int best_prob, int best_freq, const_edge cur_best_edge)
918 {
919   bool is_better_edge;
920 
921   /* The BEST_* values do not have to be best, but can be a bit smaller than
922      maximum values.  */
923   int diff_prob = best_prob / 10;
924   int diff_freq = best_freq / 10;
925 
926   /* The smaller one is better to keep the original order.  */
927   if (optimize_function_for_size_p (cfun))
928     return !cur_best_edge
929 	   || cur_best_edge->dest->index > e->dest->index;
930 
931   if (prob > best_prob + diff_prob)
932     /* The edge has higher probability than the temporary best edge.  */
933     is_better_edge = true;
934   else if (prob < best_prob - diff_prob)
935     /* The edge has lower probability than the temporary best edge.  */
936     is_better_edge = false;
937   else if (freq < best_freq - diff_freq)
938     /* The edge and the temporary best edge  have almost equivalent
939        probabilities.  The higher frequency of a successor now means
940        that there is another edge going into that successor.
941        This successor has lower frequency so it is better.  */
942     is_better_edge = true;
943   else if (freq > best_freq + diff_freq)
944     /* This successor has higher frequency so it is worse.  */
945     is_better_edge = false;
946   else if (e->dest->prev_bb == bb)
947     /* The edges have equivalent probabilities and the successors
948        have equivalent frequencies.  Select the previous successor.  */
949     is_better_edge = true;
950   else
951     is_better_edge = false;
952 
953   /* If we are doing hot/cold partitioning, make sure that we always favor
954      non-crossing edges over crossing edges.  */
955 
956   if (!is_better_edge
957       && flag_reorder_blocks_and_partition
958       && cur_best_edge
959       && (cur_best_edge->flags & EDGE_CROSSING)
960       && !(e->flags & EDGE_CROSSING))
961     is_better_edge = true;
962 
963   return is_better_edge;
964 }
965 
966 /* Return true when the edge E is better than the temporary best edge
967    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
968    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
969    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
970    TRACES record the information about traces.
971    When optimizing for size, the edge with smaller index is better.
972    When optimizing for speed, the edge with bigger probability or longer trace
973    is better.  */
974 
975 static bool
connect_better_edge_p(const_edge e,bool src_index_p,int best_len,const_edge cur_best_edge,struct trace * traces)976 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
977 		       const_edge cur_best_edge, struct trace *traces)
978 {
979   int e_index;
980   int b_index;
981   bool is_better_edge;
982 
983   if (!cur_best_edge)
984     return true;
985 
986   if (optimize_function_for_size_p (cfun))
987     {
988       e_index = src_index_p ? e->src->index : e->dest->index;
989       b_index = src_index_p ? cur_best_edge->src->index
990 			      : cur_best_edge->dest->index;
991       /* The smaller one is better to keep the original order.  */
992       return b_index > e_index;
993     }
994 
995   if (src_index_p)
996     {
997       e_index = e->src->index;
998 
999       if (e->probability > cur_best_edge->probability)
1000 	/* The edge has higher probability than the temporary best edge.  */
1001 	is_better_edge = true;
1002       else if (e->probability < cur_best_edge->probability)
1003 	/* The edge has lower probability than the temporary best edge.  */
1004 	is_better_edge = false;
1005       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1006 	/* The edge and the temporary best edge have equivalent probabilities.
1007 	   The edge with longer trace is better.  */
1008 	is_better_edge = true;
1009       else
1010 	is_better_edge = false;
1011     }
1012   else
1013     {
1014       e_index = e->dest->index;
1015 
1016       if (e->probability > cur_best_edge->probability)
1017 	/* The edge has higher probability than the temporary best edge.  */
1018 	is_better_edge = true;
1019       else if (e->probability < cur_best_edge->probability)
1020 	/* The edge has lower probability than the temporary best edge.  */
1021 	is_better_edge = false;
1022       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1023 	/* The edge and the temporary best edge have equivalent probabilities.
1024 	   The edge with longer trace is better.  */
1025 	is_better_edge = true;
1026       else
1027 	is_better_edge = false;
1028     }
1029 
1030   return is_better_edge;
1031 }
1032 
1033 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1034 
1035 static void
connect_traces(int n_traces,struct trace * traces)1036 connect_traces (int n_traces, struct trace *traces)
1037 {
1038   int i;
1039   bool *connected;
1040   bool two_passes;
1041   int last_trace;
1042   int current_pass;
1043   int current_partition;
1044   int freq_threshold;
1045   gcov_type count_threshold;
1046   bool for_size = optimize_function_for_size_p (cfun);
1047 
1048   freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
1049   if (max_entry_count < INT_MAX / 1000)
1050     count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
1051   else
1052     count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
1053 
1054   connected = XCNEWVEC (bool, n_traces);
1055   last_trace = -1;
1056   current_pass = 1;
1057   current_partition = BB_PARTITION (traces[0].first);
1058   two_passes = false;
1059 
1060   if (crtl->has_bb_partition)
1061     for (i = 0; i < n_traces && !two_passes; i++)
1062       if (BB_PARTITION (traces[0].first)
1063 	  != BB_PARTITION (traces[i].first))
1064 	two_passes = true;
1065 
1066   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1067     {
1068       int t = i;
1069       int t2;
1070       edge e, best;
1071       int best_len;
1072 
1073       if (i >= n_traces)
1074 	{
1075 	  gcc_assert (two_passes && current_pass == 1);
1076 	  i = 0;
1077 	  t = i;
1078 	  current_pass = 2;
1079 	  if (current_partition == BB_HOT_PARTITION)
1080 	    current_partition = BB_COLD_PARTITION;
1081 	  else
1082 	    current_partition = BB_HOT_PARTITION;
1083 	}
1084 
1085       if (connected[t])
1086 	continue;
1087 
1088       if (two_passes
1089 	  && BB_PARTITION (traces[t].first) != current_partition)
1090 	continue;
1091 
1092       connected[t] = true;
1093 
1094       /* Find the predecessor traces.  */
1095       for (t2 = t; t2 > 0;)
1096 	{
1097 	  edge_iterator ei;
1098 	  best = NULL;
1099 	  best_len = 0;
1100 	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1101 	    {
1102 	      int si = e->src->index;
1103 
1104 	      if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1105 		  && (e->flags & EDGE_CAN_FALLTHRU)
1106 		  && !(e->flags & EDGE_COMPLEX)
1107 		  && bbd[si].end_of_trace >= 0
1108 		  && !connected[bbd[si].end_of_trace]
1109 		  && (BB_PARTITION (e->src) == current_partition)
1110 		  && connect_better_edge_p (e, true, best_len, best, traces))
1111 		{
1112 		  best = e;
1113 		  best_len = traces[bbd[si].end_of_trace].length;
1114 		}
1115 	    }
1116 	  if (best)
1117 	    {
1118 	      best->src->aux = best->dest;
1119 	      t2 = bbd[best->src->index].end_of_trace;
1120 	      connected[t2] = true;
1121 
1122 	      if (dump_file)
1123 		{
1124 		  fprintf (dump_file, "Connection: %d %d\n",
1125 			   best->src->index, best->dest->index);
1126 		}
1127 	    }
1128 	  else
1129 	    break;
1130 	}
1131 
1132       if (last_trace >= 0)
1133 	traces[last_trace].last->aux = traces[t2].first;
1134       last_trace = t;
1135 
1136       /* Find the successor traces.  */
1137       while (1)
1138 	{
1139 	  /* Find the continuation of the chain.  */
1140 	  edge_iterator ei;
1141 	  best = NULL;
1142 	  best_len = 0;
1143 	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1144 	    {
1145 	      int di = e->dest->index;
1146 
1147 	      if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1148 		  && (e->flags & EDGE_CAN_FALLTHRU)
1149 		  && !(e->flags & EDGE_COMPLEX)
1150 		  && bbd[di].start_of_trace >= 0
1151 		  && !connected[bbd[di].start_of_trace]
1152 		  && (BB_PARTITION (e->dest) == current_partition)
1153 		  && connect_better_edge_p (e, false, best_len, best, traces))
1154 		{
1155 		  best = e;
1156 		  best_len = traces[bbd[di].start_of_trace].length;
1157 		}
1158 	    }
1159 
1160 	  if (for_size)
1161 	    {
1162 	      if (!best)
1163 		/* Stop finding the successor traces.  */
1164 		break;
1165 
1166 	      /* It is OK to connect block n with block n + 1 or a block
1167 		 before n.  For others, only connect to the loop header.  */
1168 	      if (best->dest->index > (traces[t].last->index + 1))
1169 		{
1170 		  int count = EDGE_COUNT (best->dest->preds);
1171 
1172 		  FOR_EACH_EDGE (e, ei, best->dest->preds)
1173 		    if (e->flags & EDGE_DFS_BACK)
1174 		      count--;
1175 
1176 		  /* If dest has multiple predecessors, skip it.  We expect
1177 		     that one predecessor with smaller index connects with it
1178 		     later.  */
1179 		  if (count != 1)
1180 		    break;
1181 		}
1182 
1183 	      /* Only connect Trace n with Trace n + 1.  It is conservative
1184 		 to keep the order as close as possible to the original order.
1185 		 It also helps to reduce long jumps.  */
1186 	      if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1187 		break;
1188 
1189 	      if (dump_file)
1190 		fprintf (dump_file, "Connection: %d %d\n",
1191 			 best->src->index, best->dest->index);
1192 
1193 	      t = bbd[best->dest->index].start_of_trace;
1194 	      traces[last_trace].last->aux = traces[t].first;
1195 	      connected[t] = true;
1196 	      last_trace = t;
1197 	    }
1198 	  else if (best)
1199 	    {
1200 	      if (dump_file)
1201 		{
1202 		  fprintf (dump_file, "Connection: %d %d\n",
1203 			   best->src->index, best->dest->index);
1204 		}
1205 	      t = bbd[best->dest->index].start_of_trace;
1206 	      traces[last_trace].last->aux = traces[t].first;
1207 	      connected[t] = true;
1208 	      last_trace = t;
1209 	    }
1210 	  else
1211 	    {
1212 	      /* Try to connect the traces by duplication of 1 block.  */
1213 	      edge e2;
1214 	      basic_block next_bb = NULL;
1215 	      bool try_copy = false;
1216 
1217 	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1218 		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1219 		    && (e->flags & EDGE_CAN_FALLTHRU)
1220 		    && !(e->flags & EDGE_COMPLEX)
1221 		    && (!best || e->probability > best->probability))
1222 		  {
1223 		    edge_iterator ei;
1224 		    edge best2 = NULL;
1225 		    int best2_len = 0;
1226 
1227 		    /* If the destination is a start of a trace which is only
1228 		       one block long, then no need to search the successor
1229 		       blocks of the trace.  Accept it.  */
1230 		    if (bbd[e->dest->index].start_of_trace >= 0
1231 			&& traces[bbd[e->dest->index].start_of_trace].length
1232 			   == 1)
1233 		      {
1234 			best = e;
1235 			try_copy = true;
1236 			continue;
1237 		      }
1238 
1239 		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1240 		      {
1241 			int di = e2->dest->index;
1242 
1243 			if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1244 			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1245 				&& !(e2->flags & EDGE_COMPLEX)
1246 				&& bbd[di].start_of_trace >= 0
1247 				&& !connected[bbd[di].start_of_trace]
1248 				&& BB_PARTITION (e2->dest) == current_partition
1249 				&& EDGE_FREQUENCY (e2) >= freq_threshold
1250 				&& e2->count >= count_threshold
1251 				&& (!best2
1252 				    || e2->probability > best2->probability
1253 				    || (e2->probability == best2->probability
1254 					&& traces[bbd[di].start_of_trace].length
1255 					   > best2_len))))
1256 			  {
1257 			    best = e;
1258 			    best2 = e2;
1259 			    if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1260 			      best2_len = traces[bbd[di].start_of_trace].length;
1261 			    else
1262 			      best2_len = INT_MAX;
1263 			    next_bb = e2->dest;
1264 			    try_copy = true;
1265 			  }
1266 		      }
1267 		  }
1268 
1269 	      if (crtl->has_bb_partition)
1270 		try_copy = false;
1271 
1272 	      /* Copy tiny blocks always; copy larger blocks only when the
1273 		 edge is traversed frequently enough.  */
1274 	      if (try_copy
1275 		  && copy_bb_p (best->dest,
1276 				optimize_edge_for_speed_p (best)
1277 				&& EDGE_FREQUENCY (best) >= freq_threshold
1278 				&& best->count >= count_threshold))
1279 		{
1280 		  basic_block new_bb;
1281 
1282 		  if (dump_file)
1283 		    {
1284 		      fprintf (dump_file, "Connection: %d %d ",
1285 			       traces[t].last->index, best->dest->index);
1286 		      if (!next_bb)
1287 			fputc ('\n', dump_file);
1288 		      else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1289 			fprintf (dump_file, "exit\n");
1290 		      else
1291 			fprintf (dump_file, "%d\n", next_bb->index);
1292 		    }
1293 
1294 		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1295 		  traces[t].last = new_bb;
1296 		  if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1297 		    {
1298 		      t = bbd[next_bb->index].start_of_trace;
1299 		      traces[last_trace].last->aux = traces[t].first;
1300 		      connected[t] = true;
1301 		      last_trace = t;
1302 		    }
1303 		  else
1304 		    break;	/* Stop finding the successor traces.  */
1305 		}
1306 	      else
1307 		break;	/* Stop finding the successor traces.  */
1308 	    }
1309 	}
1310     }
1311 
1312   if (dump_file)
1313     {
1314       basic_block bb;
1315 
1316       fprintf (dump_file, "Final order:\n");
1317       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1318 	fprintf (dump_file, "%d ", bb->index);
1319       fprintf (dump_file, "\n");
1320       fflush (dump_file);
1321     }
1322 
1323   FREE (connected);
1324 }
1325 
1326 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1327    when code size is allowed to grow by duplication.  */
1328 
1329 static bool
copy_bb_p(const_basic_block bb,int code_may_grow)1330 copy_bb_p (const_basic_block bb, int code_may_grow)
1331 {
1332   int size = 0;
1333   int max_size = uncond_jump_length;
1334   rtx insn;
1335 
1336   if (!bb->frequency)
1337     return false;
1338   if (EDGE_COUNT (bb->preds) < 2)
1339     return false;
1340   if (!can_duplicate_block_p (bb))
1341     return false;
1342 
1343   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1344   if (EDGE_COUNT (bb->succs) > 8)
1345     return false;
1346 
1347   if (code_may_grow && optimize_bb_for_speed_p (bb))
1348     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1349 
1350   FOR_BB_INSNS (bb, insn)
1351     {
1352       if (INSN_P (insn))
1353 	size += get_attr_min_length (insn);
1354     }
1355 
1356   if (size <= max_size)
1357     return true;
1358 
1359   if (dump_file)
1360     {
1361       fprintf (dump_file,
1362 	       "Block %d can't be copied because its size = %d.\n",
1363 	       bb->index, size);
1364     }
1365 
1366   return false;
1367 }
1368 
1369 /* Return the length of unconditional jump instruction.  */
1370 
1371 int
get_uncond_jump_length(void)1372 get_uncond_jump_length (void)
1373 {
1374   rtx label, jump;
1375   int length;
1376 
1377   label = emit_label_before (gen_label_rtx (), get_insns ());
1378   jump = emit_jump_insn (gen_jump (label));
1379 
1380   length = get_attr_min_length (jump);
1381 
1382   delete_insn (jump);
1383   delete_insn (label);
1384   return length;
1385 }
1386 
1387 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1388    Duplicate the landing pad and split the edges so that no EH edge
1389    crosses partitions.  */
1390 
1391 static void
fix_up_crossing_landing_pad(eh_landing_pad old_lp,basic_block old_bb)1392 fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1393 {
1394   eh_landing_pad new_lp;
1395   basic_block new_bb, last_bb, post_bb;
1396   rtx new_label, jump, post_label;
1397   unsigned new_partition;
1398   edge_iterator ei;
1399   edge e;
1400 
1401   /* Generate the new landing-pad structure.  */
1402   new_lp = gen_eh_landing_pad (old_lp->region);
1403   new_lp->post_landing_pad = old_lp->post_landing_pad;
1404   new_lp->landing_pad = gen_label_rtx ();
1405   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1406 
1407   /* Put appropriate instructions in new bb.  */
1408   new_label = emit_label (new_lp->landing_pad);
1409 
1410   expand_dw2_landing_pad_for_region (old_lp->region);
1411 
1412   post_bb = BLOCK_FOR_INSN (old_lp->landing_pad);
1413   post_bb = single_succ (post_bb);
1414   post_label = block_label (post_bb);
1415   jump = emit_jump_insn (gen_jump (post_label));
1416   JUMP_LABEL (jump) = post_label;
1417 
1418   /* Create new basic block to be dest for lp.  */
1419   last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1420   new_bb = create_basic_block (new_label, jump, last_bb);
1421   new_bb->aux = last_bb->aux;
1422   last_bb->aux = new_bb;
1423 
1424   emit_barrier_after_bb (new_bb);
1425 
1426   make_edge (new_bb, post_bb, 0);
1427 
1428   /* Make sure new bb is in the other partition.  */
1429   new_partition = BB_PARTITION (old_bb);
1430   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1431   BB_SET_PARTITION (new_bb, new_partition);
1432 
1433   /* Fix up the edges.  */
1434   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1435     if (BB_PARTITION (e->src) == new_partition)
1436       {
1437 	rtx insn = BB_END (e->src);
1438 	rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1439 
1440 	gcc_assert (note != NULL);
1441 	gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1442 	XEXP (note, 0) = GEN_INT (new_lp->index);
1443 
1444 	/* Adjust the edge to the new destination.  */
1445 	redirect_edge_succ (e, new_bb);
1446       }
1447     else
1448       ei_next (&ei);
1449 }
1450 
1451 
1452 /* Ensure that all hot bbs are included in a hot path through the
1453    procedure. This is done by calling this function twice, once
1454    with WALK_UP true (to look for paths from the entry to hot bbs) and
1455    once with WALK_UP false (to look for paths from hot bbs to the exit).
1456    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1457    to BBS_IN_HOT_PARTITION.  */
1458 
1459 static unsigned int
sanitize_hot_paths(bool walk_up,unsigned int cold_bb_count,vec<basic_block> * bbs_in_hot_partition)1460 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1461                     vec<basic_block> *bbs_in_hot_partition)
1462 {
1463   /* Callers check this.  */
1464   gcc_checking_assert (cold_bb_count);
1465 
1466   /* Keep examining hot bbs while we still have some left to check
1467      and there are remaining cold bbs.  */
1468   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1469   while (! hot_bbs_to_check.is_empty ()
1470          && cold_bb_count)
1471     {
1472       basic_block bb = hot_bbs_to_check.pop ();
1473       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1474       edge e;
1475       edge_iterator ei;
1476       int highest_probability = 0;
1477       int highest_freq = 0;
1478       gcov_type highest_count = 0;
1479       bool found = false;
1480 
1481       /* Walk the preds/succs and check if there is at least one already
1482          marked hot. Keep track of the most frequent pred/succ so that we
1483          can mark it hot if we don't find one.  */
1484       FOR_EACH_EDGE (e, ei, edges)
1485         {
1486           basic_block reach_bb = walk_up ? e->src : e->dest;
1487 
1488           if (e->flags & EDGE_DFS_BACK)
1489             continue;
1490 
1491           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1492           {
1493             found = true;
1494             break;
1495           }
1496           /* The following loop will look for the hottest edge via
1497              the edge count, if it is non-zero, then fallback to the edge
1498              frequency and finally the edge probability.  */
1499           if (e->count > highest_count)
1500             highest_count = e->count;
1501           int edge_freq = EDGE_FREQUENCY (e);
1502           if (edge_freq > highest_freq)
1503             highest_freq = edge_freq;
1504           if (e->probability > highest_probability)
1505             highest_probability = e->probability;
1506         }
1507 
1508       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1509          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1510          then the most frequent pred (or succ) needs to be adjusted.  In the
1511          case where multiple preds/succs have the same frequency (e.g. a
1512          50-50 branch), then both will be adjusted.  */
1513       if (found)
1514         continue;
1515 
1516       FOR_EACH_EDGE (e, ei, edges)
1517         {
1518           if (e->flags & EDGE_DFS_BACK)
1519             continue;
1520           /* Select the hottest edge using the edge count, if it is non-zero,
1521              then fallback to the edge frequency and finally the edge
1522              probability.  */
1523           if (highest_count)
1524             {
1525               if (e->count < highest_count)
1526                 continue;
1527             }
1528           else if (highest_freq)
1529             {
1530               if (EDGE_FREQUENCY (e) < highest_freq)
1531                 continue;
1532             }
1533           else if (e->probability < highest_probability)
1534             continue;
1535 
1536           basic_block reach_bb = walk_up ? e->src : e->dest;
1537 
1538           /* We have a hot bb with an immediate dominator that is cold.
1539              The dominator needs to be re-marked hot.  */
1540           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1541           cold_bb_count--;
1542 
1543           /* Now we need to examine newly-hot reach_bb to see if it is also
1544              dominated by a cold bb.  */
1545           bbs_in_hot_partition->safe_push (reach_bb);
1546           hot_bbs_to_check.safe_push (reach_bb);
1547         }
1548     }
1549 
1550   return cold_bb_count;
1551 }
1552 
1553 
1554 /* Find the basic blocks that are rarely executed and need to be moved to
1555    a separate section of the .o file (to cut down on paging and improve
1556    cache locality).  Return a vector of all edges that cross.  */
1557 
1558 static vec<edge>
find_rarely_executed_basic_blocks_and_crossing_edges(void)1559 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1560 {
1561   vec<edge> crossing_edges = vNULL;
1562   basic_block bb;
1563   edge e;
1564   edge_iterator ei;
1565   unsigned int cold_bb_count = 0;
1566   vec<basic_block> bbs_in_hot_partition = vNULL;
1567 
1568   /* Mark which partition (hot/cold) each basic block belongs in.  */
1569   FOR_EACH_BB_FN (bb, cfun)
1570     {
1571       bool cold_bb = false;
1572 
1573       if (probably_never_executed_bb_p (cfun, bb))
1574         {
1575           /* Handle profile insanities created by upstream optimizations
1576              by also checking the incoming edge weights. If there is a non-cold
1577              incoming edge, conservatively prevent this block from being split
1578              into the cold section.  */
1579           cold_bb = true;
1580           FOR_EACH_EDGE (e, ei, bb->preds)
1581             if (!probably_never_executed_edge_p (cfun, e))
1582               {
1583                 cold_bb = false;
1584                 break;
1585               }
1586         }
1587       if (cold_bb)
1588         {
1589           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1590           cold_bb_count++;
1591         }
1592       else
1593         {
1594           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1595           bbs_in_hot_partition.safe_push (bb);
1596         }
1597     }
1598 
1599   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1600      Several different possibilities may include cold bbs along all paths
1601      to/from a hot bb. One is that there are edge weight insanities
1602      due to optimization phases that do not properly update basic block profile
1603      counts. The second is that the entry of the function may not be hot, because
1604      it is entered fewer times than the number of profile training runs, but there
1605      is a loop inside the function that causes blocks within the function to be
1606      above the threshold for hotness. This is fixed by walking up from hot bbs
1607      to the entry block, and then down from hot bbs to the exit, performing
1608      partitioning fixups as necessary.  */
1609   if (cold_bb_count)
1610     {
1611       mark_dfs_back_edges ();
1612       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1613                                           &bbs_in_hot_partition);
1614       if (cold_bb_count)
1615         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1616     }
1617 
1618   /* The format of .gcc_except_table does not allow landing pads to
1619      be in a different partition as the throw.  Fix this by either
1620      moving or duplicating the landing pads.  */
1621   if (cfun->eh->lp_array)
1622     {
1623       unsigned i;
1624       eh_landing_pad lp;
1625 
1626       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1627 	{
1628 	  bool all_same, all_diff;
1629 
1630 	  if (lp == NULL
1631 	      || lp->landing_pad == NULL_RTX
1632 	      || !LABEL_P (lp->landing_pad))
1633 	    continue;
1634 
1635 	  all_same = all_diff = true;
1636 	  bb = BLOCK_FOR_INSN (lp->landing_pad);
1637 	  FOR_EACH_EDGE (e, ei, bb->preds)
1638 	    {
1639 	      gcc_assert (e->flags & EDGE_EH);
1640 	      if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1641 		all_diff = false;
1642 	      else
1643 		all_same = false;
1644 	    }
1645 
1646 	  if (all_same)
1647 	    ;
1648 	  else if (all_diff)
1649 	    {
1650 	      int which = BB_PARTITION (bb);
1651 	      which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1652 	      BB_SET_PARTITION (bb, which);
1653 	    }
1654 	  else
1655 	    fix_up_crossing_landing_pad (lp, bb);
1656 	}
1657     }
1658 
1659   /* Mark every edge that crosses between sections.  */
1660 
1661   FOR_EACH_BB_FN (bb, cfun)
1662     FOR_EACH_EDGE (e, ei, bb->succs)
1663       {
1664 	unsigned int flags = e->flags;
1665 
1666         /* We should never have EDGE_CROSSING set yet.  */
1667 	gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1668 
1669 	if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1670 	    && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1671 	    && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1672 	  {
1673 	    crossing_edges.safe_push (e);
1674 	    flags |= EDGE_CROSSING;
1675 	  }
1676 
1677 	/* Now that we've split eh edges as appropriate, allow landing pads
1678 	   to be merged with the post-landing pads.  */
1679 	flags &= ~EDGE_PRESERVE;
1680 
1681 	e->flags = flags;
1682       }
1683 
1684   return crossing_edges;
1685 }
1686 
1687 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1688 
1689 static void
set_edge_can_fallthru_flag(void)1690 set_edge_can_fallthru_flag (void)
1691 {
1692   basic_block bb;
1693 
1694   FOR_EACH_BB_FN (bb, cfun)
1695     {
1696       edge e;
1697       edge_iterator ei;
1698 
1699       FOR_EACH_EDGE (e, ei, bb->succs)
1700 	{
1701 	  e->flags &= ~EDGE_CAN_FALLTHRU;
1702 
1703 	  /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1704 	  if (e->flags & EDGE_FALLTHRU)
1705 	    e->flags |= EDGE_CAN_FALLTHRU;
1706 	}
1707 
1708       /* If the BB ends with an invertible condjump all (2) edges are
1709 	 CAN_FALLTHRU edges.  */
1710       if (EDGE_COUNT (bb->succs) != 2)
1711 	continue;
1712       if (!any_condjump_p (BB_END (bb)))
1713 	continue;
1714       if (!invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0))
1715 	continue;
1716       invert_jump (BB_END (bb), JUMP_LABEL (BB_END (bb)), 0);
1717       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1718       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1719     }
1720 }
1721 
1722 /* If any destination of a crossing edge does not have a label, add label;
1723    Convert any easy fall-through crossing edges to unconditional jumps.  */
1724 
1725 static void
add_labels_and_missing_jumps(vec<edge> crossing_edges)1726 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1727 {
1728   size_t i;
1729   edge e;
1730 
1731   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1732     {
1733       basic_block src = e->src;
1734       basic_block dest = e->dest;
1735       rtx label, new_jump;
1736 
1737       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1738 	continue;
1739 
1740       /* Make sure dest has a label.  */
1741       label = block_label (dest);
1742 
1743       /* Nothing to do for non-fallthru edges.  */
1744       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1745 	continue;
1746       if ((e->flags & EDGE_FALLTHRU) == 0)
1747 	continue;
1748 
1749       /* If the block does not end with a control flow insn, then we
1750 	 can trivially add a jump to the end to fixup the crossing.
1751 	 Otherwise the jump will have to go in a new bb, which will
1752 	 be handled by fix_up_fall_thru_edges function.  */
1753       if (control_flow_insn_p (BB_END (src)))
1754 	continue;
1755 
1756       /* Make sure there's only one successor.  */
1757       gcc_assert (single_succ_p (src));
1758 
1759       new_jump = emit_jump_insn_after (gen_jump (label), BB_END (src));
1760       BB_END (src) = new_jump;
1761       JUMP_LABEL (new_jump) = label;
1762       LABEL_NUSES (label) += 1;
1763 
1764       emit_barrier_after_bb (src);
1765 
1766       /* Mark edge as non-fallthru.  */
1767       e->flags &= ~EDGE_FALLTHRU;
1768     }
1769 }
1770 
1771 /* Find any bb's where the fall-through edge is a crossing edge (note that
1772    these bb's must also contain a conditional jump or end with a call
1773    instruction; we've already dealt with fall-through edges for blocks
1774    that didn't have a conditional jump or didn't end with call instruction
1775    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1776    edge to non-crossing edge by inserting a new bb to fall-through into.
1777    The new bb will contain an unconditional jump (crossing edge) to the
1778    original fall through destination.  */
1779 
1780 static void
fix_up_fall_thru_edges(void)1781 fix_up_fall_thru_edges (void)
1782 {
1783   basic_block cur_bb;
1784   basic_block new_bb;
1785   edge succ1;
1786   edge succ2;
1787   edge fall_thru;
1788   edge cond_jump = NULL;
1789   edge e;
1790   bool cond_jump_crosses;
1791   int invert_worked;
1792   rtx old_jump;
1793   rtx fall_thru_label;
1794 
1795   FOR_EACH_BB_FN (cur_bb, cfun)
1796     {
1797       fall_thru = NULL;
1798       if (EDGE_COUNT (cur_bb->succs) > 0)
1799 	succ1 = EDGE_SUCC (cur_bb, 0);
1800       else
1801 	succ1 = NULL;
1802 
1803       if (EDGE_COUNT (cur_bb->succs) > 1)
1804 	succ2 = EDGE_SUCC (cur_bb, 1);
1805       else
1806 	succ2 = NULL;
1807 
1808       /* Find the fall-through edge.  */
1809 
1810       if (succ1
1811 	  && (succ1->flags & EDGE_FALLTHRU))
1812 	{
1813 	  fall_thru = succ1;
1814 	  cond_jump = succ2;
1815 	}
1816       else if (succ2
1817 	       && (succ2->flags & EDGE_FALLTHRU))
1818 	{
1819 	  fall_thru = succ2;
1820 	  cond_jump = succ1;
1821 	}
1822       else if (succ1
1823 	       && (block_ends_with_call_p (cur_bb)
1824 		   || can_throw_internal (BB_END (cur_bb))))
1825 	{
1826 	  edge e;
1827 	  edge_iterator ei;
1828 
1829 	  FOR_EACH_EDGE (e, ei, cur_bb->succs)
1830 	    if (e->flags & EDGE_FALLTHRU)
1831 	      {
1832 		fall_thru = e;
1833 		break;
1834 	      }
1835 	}
1836 
1837       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1838 	{
1839 	  /* Check to see if the fall-thru edge is a crossing edge.  */
1840 
1841 	  if (fall_thru->flags & EDGE_CROSSING)
1842 	    {
1843 	      /* The fall_thru edge crosses; now check the cond jump edge, if
1844 		 it exists.  */
1845 
1846 	      cond_jump_crosses = true;
1847 	      invert_worked  = 0;
1848 	      old_jump = BB_END (cur_bb);
1849 
1850 	      /* Find the jump instruction, if there is one.  */
1851 
1852 	      if (cond_jump)
1853 		{
1854 		  if (!(cond_jump->flags & EDGE_CROSSING))
1855 		    cond_jump_crosses = false;
1856 
1857 		  /* We know the fall-thru edge crosses; if the cond
1858 		     jump edge does NOT cross, and its destination is the
1859 		     next block in the bb order, invert the jump
1860 		     (i.e. fix it so the fall through does not cross and
1861 		     the cond jump does).  */
1862 
1863 		  if (!cond_jump_crosses)
1864 		    {
1865 		      /* Find label in fall_thru block. We've already added
1866 			 any missing labels, so there must be one.  */
1867 
1868 		      fall_thru_label = block_label (fall_thru->dest);
1869 
1870 		      if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1871 			invert_worked = invert_jump (old_jump,
1872 						     fall_thru_label,0);
1873 		      if (invert_worked)
1874 			{
1875 			  fall_thru->flags &= ~EDGE_FALLTHRU;
1876 			  cond_jump->flags |= EDGE_FALLTHRU;
1877 			  update_br_prob_note (cur_bb);
1878 			  e = fall_thru;
1879 			  fall_thru = cond_jump;
1880 			  cond_jump = e;
1881 			  cond_jump->flags |= EDGE_CROSSING;
1882 			  fall_thru->flags &= ~EDGE_CROSSING;
1883 			}
1884 		    }
1885 		}
1886 
1887 	      if (cond_jump_crosses || !invert_worked)
1888 		{
1889 		  /* This is the case where both edges out of the basic
1890 		     block are crossing edges. Here we will fix up the
1891 		     fall through edge. The jump edge will be taken care
1892 		     of later.  The EDGE_CROSSING flag of fall_thru edge
1893 		     is unset before the call to force_nonfallthru
1894 		     function because if a new basic-block is created
1895 		     this edge remains in the current section boundary
1896 		     while the edge between new_bb and the fall_thru->dest
1897 		     becomes EDGE_CROSSING.  */
1898 
1899 		  fall_thru->flags &= ~EDGE_CROSSING;
1900 		  new_bb = force_nonfallthru (fall_thru);
1901 
1902 		  if (new_bb)
1903 		    {
1904 		      new_bb->aux = cur_bb->aux;
1905 		      cur_bb->aux = new_bb;
1906 
1907                       /* This is done by force_nonfallthru_and_redirect.  */
1908 		      gcc_assert (BB_PARTITION (new_bb)
1909                                   == BB_PARTITION (cur_bb));
1910 
1911 		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1912 		    }
1913 		  else
1914 		    {
1915 		      /* If a new basic-block was not created; restore
1916 			 the EDGE_CROSSING flag.  */
1917 		      fall_thru->flags |= EDGE_CROSSING;
1918 		    }
1919 
1920 		  /* Add barrier after new jump */
1921 		  emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
1922 		}
1923 	    }
1924 	}
1925     }
1926 }
1927 
1928 /* This function checks the destination block of a "crossing jump" to
1929    see if it has any crossing predecessors that begin with a code label
1930    and end with an unconditional jump.  If so, it returns that predecessor
1931    block.  (This is to avoid creating lots of new basic blocks that all
1932    contain unconditional jumps to the same destination).  */
1933 
1934 static basic_block
find_jump_block(basic_block jump_dest)1935 find_jump_block (basic_block jump_dest)
1936 {
1937   basic_block source_bb = NULL;
1938   edge e;
1939   rtx insn;
1940   edge_iterator ei;
1941 
1942   FOR_EACH_EDGE (e, ei, jump_dest->preds)
1943     if (e->flags & EDGE_CROSSING)
1944       {
1945 	basic_block src = e->src;
1946 
1947 	/* Check each predecessor to see if it has a label, and contains
1948 	   only one executable instruction, which is an unconditional jump.
1949 	   If so, we can use it.  */
1950 
1951 	if (LABEL_P (BB_HEAD (src)))
1952 	  for (insn = BB_HEAD (src);
1953 	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1954 	       insn = NEXT_INSN (insn))
1955 	    {
1956 	      if (INSN_P (insn)
1957 		  && insn == BB_END (src)
1958 		  && JUMP_P (insn)
1959 		  && !any_condjump_p (insn))
1960 		{
1961 		  source_bb = src;
1962 		  break;
1963 		}
1964 	    }
1965 
1966 	if (source_bb)
1967 	  break;
1968       }
1969 
1970   return source_bb;
1971 }
1972 
1973 /* Find all BB's with conditional jumps that are crossing edges;
1974    insert a new bb and make the conditional jump branch to the new
1975    bb instead (make the new bb same color so conditional branch won't
1976    be a 'crossing' edge).  Insert an unconditional jump from the
1977    new bb to the original destination of the conditional jump.  */
1978 
1979 static void
fix_crossing_conditional_branches(void)1980 fix_crossing_conditional_branches (void)
1981 {
1982   basic_block cur_bb;
1983   basic_block new_bb;
1984   basic_block dest;
1985   edge succ1;
1986   edge succ2;
1987   edge crossing_edge;
1988   edge new_edge;
1989   rtx old_jump;
1990   rtx set_src;
1991   rtx old_label = NULL_RTX;
1992   rtx new_label;
1993 
1994   FOR_EACH_BB_FN (cur_bb, cfun)
1995     {
1996       crossing_edge = NULL;
1997       if (EDGE_COUNT (cur_bb->succs) > 0)
1998 	succ1 = EDGE_SUCC (cur_bb, 0);
1999       else
2000 	succ1 = NULL;
2001 
2002       if (EDGE_COUNT (cur_bb->succs) > 1)
2003 	succ2 = EDGE_SUCC (cur_bb, 1);
2004       else
2005 	succ2 = NULL;
2006 
2007       /* We already took care of fall-through edges, so only one successor
2008 	 can be a crossing edge.  */
2009 
2010       if (succ1 && (succ1->flags & EDGE_CROSSING))
2011 	crossing_edge = succ1;
2012       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2013 	crossing_edge = succ2;
2014 
2015       if (crossing_edge)
2016 	{
2017 	  old_jump = BB_END (cur_bb);
2018 
2019 	  /* Check to make sure the jump instruction is a
2020 	     conditional jump.  */
2021 
2022 	  set_src = NULL_RTX;
2023 
2024 	  if (any_condjump_p (old_jump))
2025 	    {
2026 	      if (GET_CODE (PATTERN (old_jump)) == SET)
2027 		set_src = SET_SRC (PATTERN (old_jump));
2028 	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2029 		{
2030 		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
2031 		  if (GET_CODE (set_src) == SET)
2032 		    set_src = SET_SRC (set_src);
2033 		  else
2034 		    set_src = NULL_RTX;
2035 		}
2036 	    }
2037 
2038 	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2039 	    {
2040 	      if (GET_CODE (XEXP (set_src, 1)) == PC)
2041 		old_label = XEXP (set_src, 2);
2042 	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
2043 		old_label = XEXP (set_src, 1);
2044 
2045 	      /* Check to see if new bb for jumping to that dest has
2046 		 already been created; if so, use it; if not, create
2047 		 a new one.  */
2048 
2049 	      new_bb = find_jump_block (crossing_edge->dest);
2050 
2051 	      if (new_bb)
2052 		new_label = block_label (new_bb);
2053 	      else
2054 		{
2055 		  basic_block last_bb;
2056 		  rtx new_jump;
2057 
2058 		  /* Create new basic block to be dest for
2059 		     conditional jump.  */
2060 
2061 		  /* Put appropriate instructions in new bb.  */
2062 
2063 		  new_label = gen_label_rtx ();
2064 		  emit_label (new_label);
2065 
2066 		  gcc_assert (GET_CODE (old_label) == LABEL_REF);
2067 		  old_label = JUMP_LABEL (old_jump);
2068 		  new_jump = emit_jump_insn (gen_jump (old_label));
2069 		  JUMP_LABEL (new_jump) = old_label;
2070 
2071 		  last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2072 		  new_bb = create_basic_block (new_label, new_jump, last_bb);
2073 		  new_bb->aux = last_bb->aux;
2074 		  last_bb->aux = new_bb;
2075 
2076 		  emit_barrier_after_bb (new_bb);
2077 
2078 		  /* Make sure new bb is in same partition as source
2079 		     of conditional branch.  */
2080 		  BB_COPY_PARTITION (new_bb, cur_bb);
2081 		}
2082 
2083 	      /* Make old jump branch to new bb.  */
2084 
2085 	      redirect_jump (old_jump, new_label, 0);
2086 
2087 	      /* Remove crossing_edge as predecessor of 'dest'.  */
2088 
2089 	      dest = crossing_edge->dest;
2090 
2091 	      redirect_edge_succ (crossing_edge, new_bb);
2092 
2093 	      /* Make a new edge from new_bb to old dest; new edge
2094 		 will be a successor for new_bb and a predecessor
2095 		 for 'dest'.  */
2096 
2097 	      if (EDGE_COUNT (new_bb->succs) == 0)
2098 		new_edge = make_edge (new_bb, dest, 0);
2099 	      else
2100 		new_edge = EDGE_SUCC (new_bb, 0);
2101 
2102 	      crossing_edge->flags &= ~EDGE_CROSSING;
2103 	      new_edge->flags |= EDGE_CROSSING;
2104 	    }
2105 	}
2106     }
2107 }
2108 
2109 /* Find any unconditional branches that cross between hot and cold
2110    sections.  Convert them into indirect jumps instead.  */
2111 
2112 static void
fix_crossing_unconditional_branches(void)2113 fix_crossing_unconditional_branches (void)
2114 {
2115   basic_block cur_bb;
2116   rtx last_insn;
2117   rtx label;
2118   rtx label_addr;
2119   rtx indirect_jump_sequence;
2120   rtx jump_insn = NULL_RTX;
2121   rtx new_reg;
2122   rtx cur_insn;
2123   edge succ;
2124 
2125   FOR_EACH_BB_FN (cur_bb, cfun)
2126     {
2127       last_insn = BB_END (cur_bb);
2128 
2129       if (EDGE_COUNT (cur_bb->succs) < 1)
2130 	continue;
2131 
2132       succ = EDGE_SUCC (cur_bb, 0);
2133 
2134       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2135 	 this point, no crossing jumps should be conditional.  */
2136 
2137       if (JUMP_P (last_insn)
2138 	  && (succ->flags & EDGE_CROSSING))
2139 	{
2140 	  gcc_assert (!any_condjump_p (last_insn));
2141 
2142 	  /* Make sure the jump is not already an indirect or table jump.  */
2143 
2144 	  if (!computed_jump_p (last_insn)
2145 	      && !tablejump_p (last_insn, NULL, NULL))
2146 	    {
2147 	      /* We have found a "crossing" unconditional branch.  Now
2148 		 we must convert it to an indirect jump.  First create
2149 		 reference of label, as target for jump.  */
2150 
2151 	      label = JUMP_LABEL (last_insn);
2152 	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
2153 	      LABEL_NUSES (label) += 1;
2154 
2155 	      /* Get a register to use for the indirect jump.  */
2156 
2157 	      new_reg = gen_reg_rtx (Pmode);
2158 
2159 	      /* Generate indirect the jump sequence.  */
2160 
2161 	      start_sequence ();
2162 	      emit_move_insn (new_reg, label_addr);
2163 	      emit_indirect_jump (new_reg);
2164 	      indirect_jump_sequence = get_insns ();
2165 	      end_sequence ();
2166 
2167 	      /* Make sure every instruction in the new jump sequence has
2168 		 its basic block set to be cur_bb.  */
2169 
2170 	      for (cur_insn = indirect_jump_sequence; cur_insn;
2171 		   cur_insn = NEXT_INSN (cur_insn))
2172 		{
2173 		  if (!BARRIER_P (cur_insn))
2174 		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
2175 		  if (JUMP_P (cur_insn))
2176 		    jump_insn = cur_insn;
2177 		}
2178 
2179 	      /* Insert the new (indirect) jump sequence immediately before
2180 		 the unconditional jump, then delete the unconditional jump.  */
2181 
2182 	      emit_insn_before (indirect_jump_sequence, last_insn);
2183 	      delete_insn (last_insn);
2184 
2185 	      JUMP_LABEL (jump_insn) = label;
2186 	      LABEL_NUSES (label)++;
2187 
2188 	      /* Make BB_END for cur_bb be the jump instruction (NOT the
2189 		 barrier instruction at the end of the sequence...).  */
2190 
2191 	      BB_END (cur_bb) = jump_insn;
2192 	    }
2193 	}
2194     }
2195 }
2196 
2197 /* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
2198 
2199 static void
add_reg_crossing_jump_notes(void)2200 add_reg_crossing_jump_notes (void)
2201 {
2202   basic_block bb;
2203   edge e;
2204   edge_iterator ei;
2205 
2206   FOR_EACH_BB_FN (bb, cfun)
2207     FOR_EACH_EDGE (e, ei, bb->succs)
2208       if ((e->flags & EDGE_CROSSING)
2209 	  && JUMP_P (BB_END (e->src))
2210           /* Some notes were added during fix_up_fall_thru_edges, via
2211              force_nonfallthru_and_redirect.  */
2212           && !find_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX))
2213 	add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
2214 }
2215 
2216 /* Reorder basic blocks.  The main entry point to this file.  FLAGS is
2217    the set of flags to pass to cfg_layout_initialize().  */
2218 
2219 static void
reorder_basic_blocks(void)2220 reorder_basic_blocks (void)
2221 {
2222   int n_traces;
2223   int i;
2224   struct trace *traces;
2225 
2226   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2227 
2228   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2229     return;
2230 
2231   set_edge_can_fallthru_flag ();
2232   mark_dfs_back_edges ();
2233 
2234   /* We are estimating the length of uncond jump insn only once since the code
2235      for getting the insn length always returns the minimal length now.  */
2236   if (uncond_jump_length == 0)
2237     uncond_jump_length = get_uncond_jump_length ();
2238 
2239   /* We need to know some information for each basic block.  */
2240   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2241   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2242   for (i = 0; i < array_size; i++)
2243     {
2244       bbd[i].start_of_trace = -1;
2245       bbd[i].end_of_trace = -1;
2246       bbd[i].in_trace = -1;
2247       bbd[i].visited = 0;
2248       bbd[i].heap = NULL;
2249       bbd[i].node = NULL;
2250     }
2251 
2252   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2253   n_traces = 0;
2254   find_traces (&n_traces, traces);
2255   connect_traces (n_traces, traces);
2256   FREE (traces);
2257   FREE (bbd);
2258 
2259   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2260 
2261   if (dump_file)
2262     {
2263       if (dump_flags & TDF_DETAILS)
2264 	dump_reg_info (dump_file);
2265       dump_flow_info (dump_file, dump_flags);
2266     }
2267 
2268   /* Signal that rtl_verify_flow_info_1 can now verify that there
2269      is at most one switch between hot/cold sections.  */
2270   crtl->bb_reorder_complete = true;
2271 }
2272 
2273 /* Determine which partition the first basic block in the function
2274    belongs to, then find the first basic block in the current function
2275    that belongs to a different section, and insert a
2276    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2277    instruction stream.  When writing out the assembly code,
2278    encountering this note will make the compiler switch between the
2279    hot and cold text sections.  */
2280 
2281 void
insert_section_boundary_note(void)2282 insert_section_boundary_note (void)
2283 {
2284   basic_block bb;
2285   bool switched_sections = false;
2286   int current_partition = 0;
2287 
2288   if (!crtl->has_bb_partition)
2289     return;
2290 
2291   FOR_EACH_BB_FN (bb, cfun)
2292     {
2293       if (!current_partition)
2294 	current_partition = BB_PARTITION (bb);
2295       if (BB_PARTITION (bb) != current_partition)
2296 	{
2297 	  gcc_assert (!switched_sections);
2298           switched_sections = true;
2299           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2300           current_partition = BB_PARTITION (bb);
2301 	}
2302     }
2303 }
2304 
2305 static bool
gate_handle_reorder_blocks(void)2306 gate_handle_reorder_blocks (void)
2307 {
2308   if (targetm.cannot_modify_jumps_p ())
2309     return false;
2310   return (optimize > 0
2311 	  && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2312 }
2313 
2314 static unsigned int
rest_of_handle_reorder_blocks(void)2315 rest_of_handle_reorder_blocks (void)
2316 {
2317   basic_block bb;
2318 
2319   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2320      splitting possibly introduced more crossjumping opportunities.  */
2321   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2322 
2323   reorder_basic_blocks ();
2324   cleanup_cfg (CLEANUP_EXPENSIVE);
2325 
2326   FOR_EACH_BB_FN (bb, cfun)
2327     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2328       bb->aux = bb->next_bb;
2329   cfg_layout_finalize ();
2330 
2331   return 0;
2332 }
2333 
2334 namespace {
2335 
2336 const pass_data pass_data_reorder_blocks =
2337 {
2338   RTL_PASS, /* type */
2339   "bbro", /* name */
2340   OPTGROUP_NONE, /* optinfo_flags */
2341   true, /* has_gate */
2342   true, /* has_execute */
2343   TV_REORDER_BLOCKS, /* tv_id */
2344   0, /* properties_required */
2345   0, /* properties_provided */
2346   0, /* properties_destroyed */
2347   0, /* todo_flags_start */
2348   TODO_verify_rtl_sharing, /* todo_flags_finish */
2349 };
2350 
2351 class pass_reorder_blocks : public rtl_opt_pass
2352 {
2353 public:
pass_reorder_blocks(gcc::context * ctxt)2354   pass_reorder_blocks (gcc::context *ctxt)
2355     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2356   {}
2357 
2358   /* opt_pass methods: */
gate()2359   bool gate () { return gate_handle_reorder_blocks (); }
execute()2360   unsigned int execute () { return rest_of_handle_reorder_blocks (); }
2361 
2362 }; // class pass_reorder_blocks
2363 
2364 } // anon namespace
2365 
2366 rtl_opt_pass *
make_pass_reorder_blocks(gcc::context * ctxt)2367 make_pass_reorder_blocks (gcc::context *ctxt)
2368 {
2369   return new pass_reorder_blocks (ctxt);
2370 }
2371 
2372 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2373    computed gotos that were factored early on in the compilation process to
2374    speed up edge based data flow.  We used to not unfactoring them again,
2375    which can seriously pessimize code with many computed jumps in the source
2376    code, such as interpreters.  See e.g. PR15242.  */
2377 
2378 static bool
gate_duplicate_computed_gotos(void)2379 gate_duplicate_computed_gotos (void)
2380 {
2381   if (targetm.cannot_modify_jumps_p ())
2382     return false;
2383   return (optimize > 0
2384 	  && flag_expensive_optimizations
2385 	  && ! optimize_function_for_size_p (cfun));
2386 }
2387 
2388 
2389 static unsigned int
duplicate_computed_gotos(void)2390 duplicate_computed_gotos (void)
2391 {
2392   basic_block bb, new_bb;
2393   bitmap candidates;
2394   int max_size;
2395   bool changed = false;
2396 
2397   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2398     return 0;
2399 
2400   clear_bb_flags ();
2401   cfg_layout_initialize (0);
2402 
2403   /* We are estimating the length of uncond jump insn only once
2404      since the code for getting the insn length always returns
2405      the minimal length now.  */
2406   if (uncond_jump_length == 0)
2407     uncond_jump_length = get_uncond_jump_length ();
2408 
2409   max_size
2410     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2411   candidates = BITMAP_ALLOC (NULL);
2412 
2413   /* Look for blocks that end in a computed jump, and see if such blocks
2414      are suitable for unfactoring.  If a block is a candidate for unfactoring,
2415      mark it in the candidates.  */
2416   FOR_EACH_BB_FN (bb, cfun)
2417     {
2418       rtx insn;
2419       edge e;
2420       edge_iterator ei;
2421       int size, all_flags;
2422 
2423       /* Build the reorder chain for the original order of blocks.  */
2424       if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2425 	bb->aux = bb->next_bb;
2426 
2427       /* Obviously the block has to end in a computed jump.  */
2428       if (!computed_jump_p (BB_END (bb)))
2429 	continue;
2430 
2431       /* Only consider blocks that can be duplicated.  */
2432       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2433 	  || !can_duplicate_block_p (bb))
2434 	continue;
2435 
2436       /* Make sure that the block is small enough.  */
2437       size = 0;
2438       FOR_BB_INSNS (bb, insn)
2439 	if (INSN_P (insn))
2440 	  {
2441 	    size += get_attr_min_length (insn);
2442 	    if (size > max_size)
2443 	       break;
2444 	  }
2445       if (size > max_size)
2446 	continue;
2447 
2448       /* Final check: there must not be any incoming abnormal edges.  */
2449       all_flags = 0;
2450       FOR_EACH_EDGE (e, ei, bb->preds)
2451 	all_flags |= e->flags;
2452       if (all_flags & EDGE_COMPLEX)
2453 	continue;
2454 
2455       bitmap_set_bit (candidates, bb->index);
2456     }
2457 
2458   /* Nothing to do if there is no computed jump here.  */
2459   if (bitmap_empty_p (candidates))
2460     goto done;
2461 
2462   /* Duplicate computed gotos.  */
2463   FOR_EACH_BB_FN (bb, cfun)
2464     {
2465       if (bb->flags & BB_VISITED)
2466 	continue;
2467 
2468       bb->flags |= BB_VISITED;
2469 
2470       /* BB must have one outgoing edge.  That edge must not lead to
2471 	 the exit block or the next block.
2472 	 The destination must have more than one predecessor.  */
2473       if (!single_succ_p (bb)
2474 	  || single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2475 	  || single_succ (bb) == bb->next_bb
2476 	  || single_pred_p (single_succ (bb)))
2477 	continue;
2478 
2479       /* The successor block has to be a duplication candidate.  */
2480       if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2481 	continue;
2482 
2483       /* Don't duplicate a partition crossing edge, which requires difficult
2484          fixup.  */
2485       if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX))
2486 	continue;
2487 
2488       new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2489       new_bb->aux = bb->aux;
2490       bb->aux = new_bb;
2491       new_bb->flags |= BB_VISITED;
2492       changed = true;
2493     }
2494 
2495 done:
2496   /* Duplicating blocks above will redirect edges and may cause hot blocks
2497      previously reached by both hot and cold blocks to become dominated only
2498      by cold blocks.  */
2499   if (changed)
2500     fixup_partitions ();
2501   cfg_layout_finalize ();
2502 
2503   BITMAP_FREE (candidates);
2504   return 0;
2505 }
2506 
2507 namespace {
2508 
2509 const pass_data pass_data_duplicate_computed_gotos =
2510 {
2511   RTL_PASS, /* type */
2512   "compgotos", /* name */
2513   OPTGROUP_NONE, /* optinfo_flags */
2514   true, /* has_gate */
2515   true, /* has_execute */
2516   TV_REORDER_BLOCKS, /* tv_id */
2517   0, /* properties_required */
2518   0, /* properties_provided */
2519   0, /* properties_destroyed */
2520   0, /* todo_flags_start */
2521   TODO_verify_rtl_sharing, /* todo_flags_finish */
2522 };
2523 
2524 class pass_duplicate_computed_gotos : public rtl_opt_pass
2525 {
2526 public:
pass_duplicate_computed_gotos(gcc::context * ctxt)2527   pass_duplicate_computed_gotos (gcc::context *ctxt)
2528     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2529   {}
2530 
2531   /* opt_pass methods: */
gate()2532   bool gate () { return gate_duplicate_computed_gotos (); }
execute()2533   unsigned int execute () { return duplicate_computed_gotos (); }
2534 
2535 }; // class pass_duplicate_computed_gotos
2536 
2537 } // anon namespace
2538 
2539 rtl_opt_pass *
make_pass_duplicate_computed_gotos(gcc::context * ctxt)2540 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2541 {
2542   return new pass_duplicate_computed_gotos (ctxt);
2543 }
2544 
2545 static bool
gate_handle_partition_blocks(void)2546 gate_handle_partition_blocks (void)
2547 {
2548   /* The optimization to partition hot/cold basic blocks into separate
2549      sections of the .o file does not work well with linkonce or with
2550      user defined section attributes.  Don't call it if either case
2551      arises.  */
2552   return (flag_reorder_blocks_and_partition
2553           && optimize
2554 	  /* See gate_handle_reorder_blocks.  We should not partition if
2555 	     we are going to omit the reordering.  */
2556 	  && optimize_function_for_speed_p (cfun)
2557 	  && !DECL_ONE_ONLY (current_function_decl)
2558 	  && !user_defined_section_attribute);
2559 }
2560 
2561 /* This function is the main 'entrance' for the optimization that
2562    partitions hot and cold basic blocks into separate sections of the
2563    .o file (to improve performance and cache locality).  Ideally it
2564    would be called after all optimizations that rearrange the CFG have
2565    been called.  However part of this optimization may introduce new
2566    register usage, so it must be called before register allocation has
2567    occurred.  This means that this optimization is actually called
2568    well before the optimization that reorders basic blocks (see
2569    function above).
2570 
2571    This optimization checks the feedback information to determine
2572    which basic blocks are hot/cold, updates flags on the basic blocks
2573    to indicate which section they belong in.  This information is
2574    later used for writing out sections in the .o file.  Because hot
2575    and cold sections can be arbitrarily large (within the bounds of
2576    memory), far beyond the size of a single function, it is necessary
2577    to fix up all edges that cross section boundaries, to make sure the
2578    instructions used can actually span the required distance.  The
2579    fixes are described below.
2580 
2581    Fall-through edges must be changed into jumps; it is not safe or
2582    legal to fall through across a section boundary.  Whenever a
2583    fall-through edge crossing a section boundary is encountered, a new
2584    basic block is inserted (in the same section as the fall-through
2585    source), and the fall through edge is redirected to the new basic
2586    block.  The new basic block contains an unconditional jump to the
2587    original fall-through target.  (If the unconditional jump is
2588    insufficient to cross section boundaries, that is dealt with a
2589    little later, see below).
2590 
2591    In order to deal with architectures that have short conditional
2592    branches (which cannot span all of memory) we take any conditional
2593    jump that attempts to cross a section boundary and add a level of
2594    indirection: it becomes a conditional jump to a new basic block, in
2595    the same section.  The new basic block contains an unconditional
2596    jump to the original target, in the other section.
2597 
2598    For those architectures whose unconditional branch is also
2599    incapable of reaching all of memory, those unconditional jumps are
2600    converted into indirect jumps, through a register.
2601 
2602    IMPORTANT NOTE: This optimization causes some messy interactions
2603    with the cfg cleanup optimizations; those optimizations want to
2604    merge blocks wherever possible, and to collapse indirect jump
2605    sequences (change "A jumps to B jumps to C" directly into "A jumps
2606    to C").  Those optimizations can undo the jump fixes that
2607    partitioning is required to make (see above), in order to ensure
2608    that jumps attempting to cross section boundaries are really able
2609    to cover whatever distance the jump requires (on many architectures
2610    conditional or unconditional jumps are not able to reach all of
2611    memory).  Therefore tests have to be inserted into each such
2612    optimization to make sure that it does not undo stuff necessary to
2613    cross partition boundaries.  This would be much less of a problem
2614    if we could perform this optimization later in the compilation, but
2615    unfortunately the fact that we may need to create indirect jumps
2616    (through registers) requires that this optimization be performed
2617    before register allocation.
2618 
2619    Hot and cold basic blocks are partitioned and put in separate
2620    sections of the .o file, to reduce paging and improve cache
2621    performance (hopefully).  This can result in bits of code from the
2622    same function being widely separated in the .o file.  However this
2623    is not obvious to the current bb structure.  Therefore we must take
2624    care to ensure that: 1). There are no fall_thru edges that cross
2625    between sections; 2). For those architectures which have "short"
2626    conditional branches, all conditional branches that attempt to
2627    cross between sections are converted to unconditional branches;
2628    and, 3). For those architectures which have "short" unconditional
2629    branches, all unconditional branches that attempt to cross between
2630    sections are converted to indirect jumps.
2631 
2632    The code for fixing up fall_thru edges that cross between hot and
2633    cold basic blocks does so by creating new basic blocks containing
2634    unconditional branches to the appropriate label in the "other"
2635    section.  The new basic block is then put in the same (hot or cold)
2636    section as the original conditional branch, and the fall_thru edge
2637    is modified to fall into the new basic block instead.  By adding
2638    this level of indirection we end up with only unconditional branches
2639    crossing between hot and cold sections.
2640 
2641    Conditional branches are dealt with by adding a level of indirection.
2642    A new basic block is added in the same (hot/cold) section as the
2643    conditional branch, and the conditional branch is retargeted to the
2644    new basic block.  The new basic block contains an unconditional branch
2645    to the original target of the conditional branch (in the other section).
2646 
2647    Unconditional branches are dealt with by converting them into
2648    indirect jumps.  */
2649 
2650 static unsigned
partition_hot_cold_basic_blocks(void)2651 partition_hot_cold_basic_blocks (void)
2652 {
2653   vec<edge> crossing_edges;
2654 
2655   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2656     return 0;
2657 
2658   df_set_flags (DF_DEFER_INSN_RESCAN);
2659 
2660   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2661   if (!crossing_edges.exists ())
2662     return 0;
2663 
2664   crtl->has_bb_partition = true;
2665 
2666   /* Make sure the source of any crossing edge ends in a jump and the
2667      destination of any crossing edge has a label.  */
2668   add_labels_and_missing_jumps (crossing_edges);
2669 
2670   /* Convert all crossing fall_thru edges to non-crossing fall
2671      thrus to unconditional jumps (that jump to the original fall
2672      through dest).  */
2673   fix_up_fall_thru_edges ();
2674 
2675   /* If the architecture does not have conditional branches that can
2676      span all of memory, convert crossing conditional branches into
2677      crossing unconditional branches.  */
2678   if (!HAS_LONG_COND_BRANCH)
2679     fix_crossing_conditional_branches ();
2680 
2681   /* If the architecture does not have unconditional branches that
2682      can span all of memory, convert crossing unconditional branches
2683      into indirect jumps.  Since adding an indirect jump also adds
2684      a new register usage, update the register usage information as
2685      well.  */
2686   if (!HAS_LONG_UNCOND_BRANCH)
2687     fix_crossing_unconditional_branches ();
2688 
2689   add_reg_crossing_jump_notes ();
2690 
2691   /* Clear bb->aux fields that the above routines were using.  */
2692   clear_aux_for_blocks ();
2693 
2694   crossing_edges.release ();
2695 
2696   /* ??? FIXME: DF generates the bb info for a block immediately.
2697      And by immediately, I mean *during* creation of the block.
2698 
2699 	#0  df_bb_refs_collect
2700 	#1  in df_bb_refs_record
2701 	#2  in create_basic_block_structure
2702 
2703      Which means that the bb_has_eh_pred test in df_bb_refs_collect
2704      will *always* fail, because no edges can have been added to the
2705      block yet.  Which of course means we don't add the right
2706      artificial refs, which means we fail df_verify (much) later.
2707 
2708      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
2709      that we also shouldn't grab data from the new blocks those new
2710      insns are in either.  In this way one can create the block, link
2711      it up properly, and have everything Just Work later, when deferred
2712      insns are processed.
2713 
2714      In the meantime, we have no other option but to throw away all
2715      of the DF data and recompute it all.  */
2716   if (cfun->eh->lp_array)
2717     {
2718       df_finish_pass (true);
2719       df_scan_alloc (NULL);
2720       df_scan_blocks ();
2721       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
2722 	 data.  We blindly generated all of them when creating the new
2723 	 landing pad.  Delete those assignments we don't use.  */
2724       df_set_flags (DF_LR_RUN_DCE);
2725       df_analyze ();
2726     }
2727 
2728   return TODO_verify_flow | TODO_verify_rtl_sharing;
2729 }
2730 
2731 namespace {
2732 
2733 const pass_data pass_data_partition_blocks =
2734 {
2735   RTL_PASS, /* type */
2736   "bbpart", /* name */
2737   OPTGROUP_NONE, /* optinfo_flags */
2738   true, /* has_gate */
2739   true, /* has_execute */
2740   TV_REORDER_BLOCKS, /* tv_id */
2741   PROP_cfglayout, /* properties_required */
2742   0, /* properties_provided */
2743   0, /* properties_destroyed */
2744   0, /* todo_flags_start */
2745   0, /* todo_flags_finish */
2746 };
2747 
2748 class pass_partition_blocks : public rtl_opt_pass
2749 {
2750 public:
pass_partition_blocks(gcc::context * ctxt)2751   pass_partition_blocks (gcc::context *ctxt)
2752     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2753   {}
2754 
2755   /* opt_pass methods: */
gate()2756   bool gate () { return gate_handle_partition_blocks (); }
execute()2757   unsigned int execute () { return partition_hot_cold_basic_blocks (); }
2758 
2759 }; // class pass_partition_blocks
2760 
2761 } // anon namespace
2762 
2763 rtl_opt_pass *
make_pass_partition_blocks(gcc::context * ctxt)2764 make_pass_partition_blocks (gcc::context *ctxt)
2765 {
2766   return new pass_partition_blocks (ctxt);
2767 }
2768