1 /* Shrink-wrapping related optimizations.
2    Copyright (C) 1987-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* This file handles shrink-wrapping related optimizations.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "cfghooks.h"
30 #include "df.h"
31 #include "memmodel.h"
32 #include "tm_p.h"
33 #include "regs.h"
34 #include "insn-config.h"
35 #include "emit-rtl.h"
36 #include "output.h"
37 #include "tree-pass.h"
38 #include "cfgrtl.h"
39 #include "cfgbuild.h"
40 #include "params.h"
41 #include "bb-reorder.h"
42 #include "shrink-wrap.h"
43 #include "regcprop.h"
44 #include "rtl-iter.h"
45 #include "valtrack.h"
46 
47 
48 /* Return true if INSN requires the stack frame to be set up.
49    PROLOGUE_USED contains the hard registers used in the function
50    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
51    prologue to set up for the function.  */
52 bool
requires_stack_frame_p(rtx_insn * insn,HARD_REG_SET prologue_used,HARD_REG_SET set_up_by_prologue)53 requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
54 			HARD_REG_SET set_up_by_prologue)
55 {
56   df_ref def, use;
57   HARD_REG_SET hardregs;
58   unsigned regno;
59 
60   if (CALL_P (insn))
61     return !SIBLING_CALL_P (insn);
62 
63   /* We need a frame to get the unique CFA expected by the unwinder.  */
64   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
65     return true;
66 
67   CLEAR_HARD_REG_SET (hardregs);
68   FOR_EACH_INSN_DEF (def, insn)
69     {
70       rtx dreg = DF_REF_REG (def);
71 
72       if (!REG_P (dreg))
73 	continue;
74 
75       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76     }
77   if (hard_reg_set_intersect_p (hardregs, prologue_used))
78     return true;
79   AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
80   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
81     if (TEST_HARD_REG_BIT (hardregs, regno)
82 	&& df_regs_ever_live_p (regno))
83       return true;
84 
85   FOR_EACH_INSN_USE (use, insn)
86     {
87       rtx reg = DF_REF_REG (use);
88 
89       if (!REG_P (reg))
90 	continue;
91 
92       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
93 			   REGNO (reg));
94     }
95   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
96     return true;
97 
98   return false;
99 }
100 
101 /* See whether there has a single live edge from BB, which dest uses
102    [REGNO, END_REGNO).  Return the live edge if its dest bb has
103    one or two predecessors.  Otherwise return NULL.  */
104 
105 static edge
live_edge_for_reg(basic_block bb,int regno,int end_regno)106 live_edge_for_reg (basic_block bb, int regno, int end_regno)
107 {
108   edge e, live_edge;
109   edge_iterator ei;
110   bitmap live;
111   int i;
112 
113   live_edge = NULL;
114   FOR_EACH_EDGE (e, ei, bb->succs)
115     {
116       live = df_get_live_in (e->dest);
117       for (i = regno; i < end_regno; i++)
118 	if (REGNO_REG_SET_P (live, i))
119 	  {
120 	    if (live_edge && live_edge != e)
121 	      return NULL;
122 	    live_edge = e;
123 	  }
124     }
125 
126   /* We can sometimes encounter dead code.  Don't try to move it
127      into the exit block.  */
128   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
129     return NULL;
130 
131   /* Reject targets of abnormal edges.  This is needed for correctness
132      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
133      exception edges even though it is generally treated as call-saved
134      for the majority of the compilation.  Moving across abnormal edges
135      isn't going to be interesting for shrink-wrap usage anyway.  */
136   if (live_edge->flags & EDGE_ABNORMAL)
137     return NULL;
138 
139   /* When live_edge->dest->preds == 2, we can create a new block on
140      the edge to make it meet the requirement.  */
141   if (EDGE_COUNT (live_edge->dest->preds) > 2)
142     return NULL;
143 
144   return live_edge;
145 }
146 
147 /* Try to move INSN from BB to a successor.  Return true on success.
148    USES and DEFS are the set of registers that are used and defined
149    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
150    is splitted or not.  */
151 
152 static bool
move_insn_for_shrink_wrap(basic_block bb,rtx_insn * insn,const HARD_REG_SET uses,const HARD_REG_SET defs,bool * split_p,struct dead_debug_local * debug)153 move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
154 			   const HARD_REG_SET uses,
155 			   const HARD_REG_SET defs,
156 			   bool *split_p,
157 			   struct dead_debug_local *debug)
158 {
159   rtx set, src, dest;
160   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
161   unsigned int i, dregno, end_dregno;
162   unsigned int sregno = FIRST_PSEUDO_REGISTER;
163   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
164   basic_block next_block;
165   edge live_edge;
166   rtx_insn *dinsn;
167   df_ref def;
168 
169   /* Look for a simple register assignment.  We don't use single_set here
170      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
171      destinations.  */
172   if (!INSN_P (insn))
173     return false;
174   set = PATTERN (insn);
175   if (GET_CODE (set) != SET)
176     return false;
177   src = SET_SRC (set);
178   dest = SET_DEST (set);
179 
180   /* For the destination, we want only a register.  Also disallow STACK
181      or FRAME related adjustments.  They are likely part of the prologue,
182      so keep them in the entry block.  */
183   if (!REG_P (dest)
184       || dest == stack_pointer_rtx
185       || dest == frame_pointer_rtx
186       || dest == hard_frame_pointer_rtx)
187     return false;
188 
189   /* For the source, we want one of:
190       (1) A (non-overlapping) register
191       (2) A constant,
192       (3) An expression involving no more than one register.
193 
194      That last point comes from the code following, which was originally
195      written to handle only register move operations, and still only handles
196      a single source register when checking for overlaps.  Happily, the
197      same checks can be applied to expressions like (plus reg const).  */
198 
199   if (CONSTANT_P (src))
200     ;
201   else if (!REG_P (src))
202     {
203       rtx src_inner = NULL_RTX;
204 
205       if (can_throw_internal (insn))
206 	return false;
207 
208       subrtx_var_iterator::array_type array;
209       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210 	{
211 	  rtx x = *iter;
212 	  switch (GET_RTX_CLASS (GET_CODE (x)))
213 	    {
214 	    case RTX_CONST_OBJ:
215 	    case RTX_COMPARE:
216 	    case RTX_COMM_COMPARE:
217 	    case RTX_BIN_ARITH:
218 	    case RTX_COMM_ARITH:
219 	    case RTX_UNARY:
220 	    case RTX_TERNARY:
221 	      /* Constant or expression.  Continue.  */
222 	      break;
223 
224 	    case RTX_OBJ:
225 	    case RTX_EXTRA:
226 	      switch (GET_CODE (x))
227 		{
228 		case UNSPEC:
229 		case SUBREG:
230 		case STRICT_LOW_PART:
231 		case PC:
232 		case LO_SUM:
233 		  /* Ok.  Continue.  */
234 		  break;
235 
236 		case REG:
237 		  /* Fail if we see a second inner register.  */
238 		  if (src_inner != NULL)
239 		    return false;
240 		  src_inner = x;
241 		  break;
242 
243 		default:
244 		  return false;
245 		}
246 	      break;
247 
248 	    default:
249 	      return false;
250 	    }
251 	}
252 
253       if (src_inner != NULL)
254 	src = src_inner;
255     }
256 
257   /* Make sure that the source register isn't defined later in BB.  */
258   if (REG_P (src))
259     {
260       sregno = REGNO (src);
261       end_sregno = END_REGNO (src);
262       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
263 	return false;
264     }
265 
266   /* Make sure that the destination register isn't referenced later in BB.  */
267   dregno = REGNO (dest);
268   end_dregno = END_REGNO (dest);
269   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
270       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
271     return false;
272 
273   /* See whether there is a successor block to which we could move INSN.  */
274   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
275   if (!live_edge)
276     return false;
277 
278   next_block = live_edge->dest;
279   /* Create a new basic block on the edge.  */
280   if (EDGE_COUNT (next_block->preds) == 2)
281     {
282       /* split_edge for a block with only one successor is meaningless.  */
283       if (EDGE_COUNT (bb->succs) == 1)
284 	return false;
285 
286       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
287       if (!df_live)
288 	return false;
289 
290       basic_block old_dest = live_edge->dest;
291       next_block = split_edge (live_edge);
292 
293       /* We create a new basic block.  Call df_grow_bb_info to make sure
294 	 all data structures are allocated.  */
295       df_grow_bb_info (df_live);
296 
297       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
298 		  df_get_live_in (old_dest));
299       df_set_bb_dirty (next_block);
300 
301       /* We should not split more than once for a function.  */
302       if (*split_p)
303 	return false;
304 
305       *split_p = true;
306     }
307 
308   /* At this point we are committed to moving INSN, but let's try to
309      move it as far as we can.  */
310   do
311     {
312       if (MAY_HAVE_DEBUG_BIND_INSNS)
313 	{
314 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
315 	    if (DEBUG_BIND_INSN_P (dinsn))
316 	      {
317 		df_ref use;
318 		FOR_EACH_INSN_USE (use, dinsn)
319 		  if (refers_to_regno_p (dregno, end_dregno,
320 					 DF_REF_REG (use), (rtx *) NULL))
321 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
322 	      }
323 	    else if (dinsn == insn)
324 	      break;
325 	}
326       live_out = df_get_live_out (bb);
327       live_in = df_get_live_in (next_block);
328       bb = next_block;
329 
330       /* Check whether BB uses DEST or clobbers DEST.  We need to add
331 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
332 	 except for any part that overlaps SRC (next loop).  */
333       if (!*split_p)
334 	{
335 	  bb_uses = &DF_LR_BB_INFO (bb)->use;
336 	  bb_defs = &DF_LR_BB_INFO (bb)->def;
337 	}
338       if (df_live)
339 	{
340 	  for (i = dregno; i < end_dregno; i++)
341 	    {
342 	      if (*split_p
343 		  || REGNO_REG_SET_P (bb_uses, i)
344 		  || REGNO_REG_SET_P (bb_defs, i)
345 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346 		next_block = NULL;
347 	      CLEAR_REGNO_REG_SET (live_out, i);
348 	      CLEAR_REGNO_REG_SET (live_in, i);
349 	    }
350 
351 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
352 	     Either way, SRC is now live on entry.  */
353 	  for (i = sregno; i < end_sregno; i++)
354 	    {
355 	      if (*split_p
356 		  || REGNO_REG_SET_P (bb_defs, i)
357 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 		next_block = NULL;
359 	      SET_REGNO_REG_SET (live_out, i);
360 	      SET_REGNO_REG_SET (live_in, i);
361 	    }
362 	}
363       else
364 	{
365 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
367 	     at -O1, just give up searching NEXT_BLOCK.  */
368 	  next_block = NULL;
369 	  for (i = dregno; i < end_dregno; i++)
370 	    {
371 	      CLEAR_REGNO_REG_SET (live_out, i);
372 	      CLEAR_REGNO_REG_SET (live_in, i);
373 	    }
374 
375 	  for (i = sregno; i < end_sregno; i++)
376 	    {
377 	      SET_REGNO_REG_SET (live_out, i);
378 	      SET_REGNO_REG_SET (live_in, i);
379 	    }
380 	}
381 
382       /* If we don't need to add the move to BB, look for a single
383 	 successor block.  */
384       if (next_block)
385 	{
386 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388 	    break;
389 	  next_block = live_edge->dest;
390 	}
391     }
392   while (next_block);
393 
394   /* For the new created basic block, there is no dataflow info at all.
395      So skip the following dataflow update and check.  */
396   if (!(*split_p))
397     {
398       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
399 	 (next loop).  */
400       for (i = dregno; i < end_dregno; i++)
401 	{
402 	  CLEAR_REGNO_REG_SET (bb_uses, i);
403 	  SET_REGNO_REG_SET (bb_defs, i);
404 	}
405 
406       /* BB now uses SRC.  */
407       for (i = sregno; i < end_sregno; i++)
408 	SET_REGNO_REG_SET (bb_uses, i);
409     }
410 
411   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
412   if (debug->used && !bitmap_empty_p (debug->used))
413     FOR_EACH_INSN_DEF (def, insn)
414       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
415 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
416 
417   emit_insn_after (PATTERN (insn), bb_note (bb));
418   delete_insn (insn);
419   return true;
420 }
421 
422 /* Look for register copies in the first block of the function, and move
423    them down into successor blocks if the register is used only on one
424    path.  This exposes more opportunities for shrink-wrapping.  These
425    kinds of sets often occur when incoming argument registers are moved
426    to call-saved registers because their values are live across one or
427    more calls during the function.  */
428 
429 static void
prepare_shrink_wrap(basic_block entry_block)430 prepare_shrink_wrap (basic_block entry_block)
431 {
432   rtx_insn *insn, *curr;
433   rtx x;
434   HARD_REG_SET uses, defs;
435   df_ref def, use;
436   bool split_p = false;
437   unsigned int i;
438   struct dead_debug_local debug;
439 
440   if (JUMP_P (BB_END (entry_block)))
441     {
442       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
443 	 to sink the copies from parameter to callee saved register out of
444 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
445 	 to release some dependences.  */
446       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
447     }
448 
449   dead_debug_local_init (&debug, NULL, NULL);
450   CLEAR_HARD_REG_SET (uses);
451   CLEAR_HARD_REG_SET (defs);
452 
453   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
454     if (NONDEBUG_INSN_P (insn)
455 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
456 				       &split_p, &debug))
457       {
458 	/* Add all defined registers to DEFs.  */
459 	FOR_EACH_INSN_DEF (def, insn)
460 	  {
461 	    x = DF_REF_REG (def);
462 	    if (REG_P (x) && HARD_REGISTER_P (x))
463 	      for (i = REGNO (x); i < END_REGNO (x); i++)
464 		SET_HARD_REG_BIT (defs, i);
465 	  }
466 
467 	/* Add all used registers to USESs.  */
468 	FOR_EACH_INSN_USE (use, insn)
469 	  {
470 	    x = DF_REF_REG (use);
471 	    if (REG_P (x) && HARD_REGISTER_P (x))
472 	      for (i = REGNO (x); i < END_REGNO (x); i++)
473 		SET_HARD_REG_BIT (uses, i);
474 	  }
475       }
476 
477   dead_debug_local_finish (&debug, NULL);
478 }
479 
480 /* Return whether basic block PRO can get the prologue.  It can not if it
481    has incoming complex edges that need a prologue inserted (we make a new
482    block for the prologue, so those edges would need to be redirected, which
483    does not work).  It also can not if there exist registers live on entry
484    to PRO that are clobbered by the prologue.  */
485 
486 static bool
can_get_prologue(basic_block pro,HARD_REG_SET prologue_clobbered)487 can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
488 {
489   edge e;
490   edge_iterator ei;
491   FOR_EACH_EDGE (e, ei, pro->preds)
492     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
493 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
494       return false;
495 
496   HARD_REG_SET live;
497   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
498   if (hard_reg_set_intersect_p (live, prologue_clobbered))
499     return false;
500 
501   return true;
502 }
503 
504 /* Return whether we can duplicate basic block BB for shrink wrapping.  We
505    cannot if the block cannot be duplicated at all, or if any of its incoming
506    edges are complex and come from a block that does not require a prologue
507    (we cannot redirect such edges), or if the block is too big to copy.
508    PRO is the basic block before which we would put the prologue, MAX_SIZE is
509    the maximum size block we allow to be copied.  */
510 
511 static bool
can_dup_for_shrink_wrapping(basic_block bb,basic_block pro,unsigned max_size)512 can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
513 {
514   if (!can_duplicate_block_p (bb))
515     return false;
516 
517   edge e;
518   edge_iterator ei;
519   FOR_EACH_EDGE (e, ei, bb->preds)
520     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
521 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
522       return false;
523 
524   unsigned size = 0;
525 
526   rtx_insn *insn;
527   FOR_BB_INSNS (bb, insn)
528     if (NONDEBUG_INSN_P (insn))
529       {
530 	size += get_attr_min_length (insn);
531 	if (size > max_size)
532 	  return false;
533       }
534 
535   return true;
536 }
537 
538 /* Do whatever needs to be done for exits that run without prologue.
539    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
540 
541 static void
handle_simple_exit(edge e)542 handle_simple_exit (edge e)
543 {
544 
545   if (e->flags & EDGE_SIBCALL)
546     {
547       /* Tell function.c to take no further action on this edge.  */
548       e->flags |= EDGE_IGNORE;
549 
550       e->flags &= ~EDGE_FALLTHRU;
551       emit_barrier_after_bb (e->src);
552       return;
553     }
554 
555   /* If the basic block the edge comes from has multiple successors,
556      split the edge.  */
557   if (EDGE_COUNT (e->src->succs) > 1)
558     {
559       basic_block old_bb = e->src;
560       rtx_insn *end = BB_END (old_bb);
561       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
562       basic_block new_bb = create_basic_block (note, note, old_bb);
563       BB_COPY_PARTITION (new_bb, old_bb);
564       BB_END (old_bb) = end;
565 
566       redirect_edge_succ (e, new_bb);
567       new_bb->count = e->count ();
568       e->flags |= EDGE_FALLTHRU;
569 
570       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
571     }
572 
573   e->flags &= ~EDGE_FALLTHRU;
574   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
575 					     BB_END (e->src));
576   JUMP_LABEL (ret) = simple_return_rtx;
577   emit_barrier_after_bb (e->src);
578 
579   if (dump_file)
580     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
581 	     INSN_UID (ret), e->src->index);
582 }
583 
584 /* Try to perform a kind of shrink-wrapping, making sure the
585    prologue/epilogue is emitted only around those parts of the
586    function that require it.
587 
588    There will be exactly one prologue, and it will be executed either
589    zero or one time, on any path.  Depending on where the prologue is
590    placed, some of the basic blocks can be reached via both paths with
591    and without a prologue.  Such blocks will be duplicated here, and the
592    edges changed to match.
593 
594    Paths that go to the exit without going through the prologue will use
595    a simple_return instead of the epilogue.  We maximize the number of
596    those, making sure to only duplicate blocks that can be duplicated.
597    If the prologue can then still be placed in multiple locations, we
598    place it as early as possible.
599 
600    An example, where we duplicate blocks with control flow (legend:
601    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
602    be taken to point down or to the right, to simplify the diagram; here,
603    block 3 needs a prologue, the rest does not):
604 
605 
606        B                 B
607        |                 |
608        2                 2
609        |\                |\
610        | 3    becomes    | 3
611        |/                |  \
612        4                 7   4
613        |\                |\  |\
614        | 5               | 8 | 5
615        |/                |/  |/
616        6                 9   6
617        |                 |   |
618        R                 S   R
619 
620 
621    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
622    edge 2->3).
623 
624    Another example, where part of a loop is duplicated (again, bb 3 is
625    the only block that needs a prologue):
626 
627 
628        B   3<--              B       ->3<--
629        |   |   |             |      |  |   |
630        |   v   |   becomes   |      |  v   |
631        2---4---              2---5--   4---
632            |                     |     |
633            R                     S     R
634 
635 
636    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
637 
638    ENTRY_EDGE is the edge where the prologue will be placed, possibly
639    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
640 
641 void
try_shrink_wrapping(edge * entry_edge,rtx_insn * prologue_seq)642 try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
643 {
644   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
645      no sense to shrink-wrap: then do not shrink-wrap!  */
646 
647   if (!SHRINK_WRAPPING_ENABLED)
648     return;
649 
650   if (crtl->profile && !targetm.profile_before_prologue ())
651     return;
652 
653   if (crtl->calls_eh_return)
654     return;
655 
656   bool empty_prologue = true;
657   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
658     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
659       {
660 	empty_prologue = false;
661 	break;
662       }
663   if (empty_prologue)
664     return;
665 
666   /* Move some code down to expose more shrink-wrapping opportunities.  */
667 
668   basic_block entry = (*entry_edge)->dest;
669   prepare_shrink_wrap (entry);
670 
671   if (dump_file)
672     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
673 
674   /* Compute the registers set and used in the prologue.  */
675 
676   HARD_REG_SET prologue_clobbered, prologue_used;
677   CLEAR_HARD_REG_SET (prologue_clobbered);
678   CLEAR_HARD_REG_SET (prologue_used);
679   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
680     if (NONDEBUG_INSN_P (insn))
681       {
682 	HARD_REG_SET this_used;
683 	CLEAR_HARD_REG_SET (this_used);
684 	note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
685 	AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
686 	IOR_HARD_REG_SET (prologue_used, this_used);
687 	note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
688       }
689   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
690   if (frame_pointer_needed)
691     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
692 
693   /* Find out what registers are set up by the prologue; any use of these
694      cannot happen before the prologue.  */
695 
696   struct hard_reg_set_container set_up_by_prologue;
697   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
698   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
699   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
700   if (frame_pointer_needed)
701     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
702 			 HARD_FRAME_POINTER_REGNUM);
703   if (pic_offset_table_rtx
704       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
705     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
706 			 PIC_OFFSET_TABLE_REGNUM);
707   if (crtl->drap_reg)
708     add_to_hard_reg_set (&set_up_by_prologue.set,
709 			 GET_MODE (crtl->drap_reg),
710 			 REGNO (crtl->drap_reg));
711   if (targetm.set_up_by_prologue)
712     targetm.set_up_by_prologue (&set_up_by_prologue);
713 
714   /* We will insert the prologue before the basic block PRO.  PRO should
715      dominate all basic blocks that need the prologue to be executed
716      before them.  First, make PRO the "tightest wrap" possible.  */
717 
718   calculate_dominance_info (CDI_DOMINATORS);
719 
720   basic_block pro = 0;
721 
722   basic_block bb;
723   edge e;
724   edge_iterator ei;
725   FOR_EACH_BB_FN (bb, cfun)
726     {
727       rtx_insn *insn;
728       FOR_BB_INSNS (bb, insn)
729 	if (NONDEBUG_INSN_P (insn)
730 	    && requires_stack_frame_p (insn, prologue_used,
731 				       set_up_by_prologue.set))
732 	  {
733 	    if (dump_file)
734 	      fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
735 	    pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
736 	    break;
737 	  }
738     }
739 
740   /* If nothing needs a prologue, just put it at the start.  This really
741      shouldn't happen, but we cannot fix it here.  */
742 
743   if (pro == 0)
744     {
745       if (dump_file)
746 	fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
747 			   "putting it at the start.\n");
748       pro = entry;
749     }
750 
751   if (dump_file)
752     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
753 	     pro->index);
754 
755   /* Now see if we can put the prologue at the start of PRO.  Putting it
756      there might require duplicating a block that cannot be duplicated,
757      or in some cases we cannot insert the prologue there at all.  If PRO
758      wont't do, try again with the immediate dominator of PRO, and so on.
759 
760      The blocks that need duplicating are those reachable from PRO but
761      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
762      reachable from PRO that we already found, and in VEC a stack of
763      those we still need to consider (to find successors).  */
764 
765   auto_bitmap bb_with;
766   bitmap_set_bit (bb_with, pro->index);
767 
768   vec<basic_block> vec;
769   vec.create (n_basic_blocks_for_fn (cfun));
770   vec.quick_push (pro);
771 
772   unsigned max_grow_size = get_uncond_jump_length ();
773   max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
774 
775   while (!vec.is_empty () && pro != entry)
776     {
777       while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
778 	{
779 	  pro = get_immediate_dominator (CDI_DOMINATORS, pro);
780 
781 	  if (bitmap_set_bit (bb_with, pro->index))
782 	    vec.quick_push (pro);
783 	}
784 
785       basic_block bb = vec.pop ();
786       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
787 	while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
788 	  {
789 	    gcc_assert (pro != entry);
790 
791 	    pro = get_immediate_dominator (CDI_DOMINATORS, pro);
792 
793 	    if (bitmap_set_bit (bb_with, pro->index))
794 	      vec.quick_push (pro);
795 	  }
796 
797       FOR_EACH_EDGE (e, ei, bb->succs)
798 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
799 	    && bitmap_set_bit (bb_with, e->dest->index))
800 	  vec.quick_push (e->dest);
801     }
802 
803   if (dump_file)
804     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
805 	     pro->index);
806 
807   /* If we can move PRO back without having to duplicate more blocks, do so.
808      We do this because putting the prologue earlier is better for scheduling.
809 
810      We can move back to a block PRE if every path from PRE will eventually
811      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
812      to dominate every block reachable from itself.  We keep in BB_TMP a
813      bitmap of the blocks reachable from PRE that we already found, and in
814      VEC a stack of those we still need to consider.
815 
816      Any block reachable from PRE is also reachable from all predecessors
817      of PRE, so if we find we need to move PRE back further we can leave
818      everything not considered so far on the stack.  Any block dominated
819      by PRE is also dominated by all other dominators of PRE, so anything
820      found good for some PRE does not need to be reconsidered later.
821 
822      We don't need to update BB_WITH because none of the new blocks found
823      can jump to a block that does not need the prologue.  */
824 
825   if (pro != entry)
826     {
827       calculate_dominance_info (CDI_POST_DOMINATORS);
828 
829       auto_bitmap bb_tmp;
830       bitmap_copy (bb_tmp, bb_with);
831       basic_block last_ok = pro;
832       vec.truncate (0);
833 
834       while (pro != entry)
835 	{
836 	  basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
837 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
838 	    break;
839 
840 	  if (bitmap_set_bit (bb_tmp, pre->index))
841 	    vec.quick_push (pre);
842 
843 	  bool ok = true;
844 	  while (!vec.is_empty ())
845 	    {
846 	      if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
847 		{
848 		  ok = false;
849 		  break;
850 		}
851 
852 	      basic_block bb = vec.pop ();
853 	      FOR_EACH_EDGE (e, ei, bb->succs)
854 		if (bitmap_set_bit (bb_tmp, e->dest->index))
855 		  vec.quick_push (e->dest);
856 	    }
857 
858 	  if (ok && can_get_prologue (pre, prologue_clobbered))
859 	    last_ok = pre;
860 
861 	  pro = pre;
862 	}
863 
864       pro = last_ok;
865 
866       free_dominance_info (CDI_POST_DOMINATORS);
867     }
868 
869   vec.release ();
870 
871   if (dump_file)
872     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
873 	     pro->index);
874 
875   if (pro == entry)
876     {
877       free_dominance_info (CDI_DOMINATORS);
878       return;
879     }
880 
881   /* Compute what fraction of the frequency and count of the blocks that run
882      both with and without prologue are for running with prologue.  This gives
883      the correct answer for reducible flow graphs; for irreducible flow graphs
884      our profile is messed up beyond repair anyway.  */
885 
886   profile_count num = profile_count::zero ();
887   profile_count den = profile_count::zero ();
888 
889   FOR_EACH_EDGE (e, ei, pro->preds)
890     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
891       {
892 	if (e->count ().initialized_p ())
893 	  num += e->count ();
894 	if (e->src->count.initialized_p ())
895 	  den += e->src->count;
896       }
897 
898   /* All is okay, so do it.  */
899 
900   crtl->shrink_wrapped = true;
901   if (dump_file)
902     fprintf (dump_file, "Performing shrink-wrapping.\n");
903 
904   /* Copy the blocks that can run both with and without prologue.  The
905      originals run with prologue, the copies without.  Store a pointer to
906      the copy in the ->aux field of the original.  */
907 
908   FOR_EACH_BB_FN (bb, cfun)
909     if (bitmap_bit_p (bb_with, bb->index)
910 	&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
911       {
912 	basic_block dup = duplicate_block (bb, 0, 0);
913 
914 	bb->aux = dup;
915 
916 	if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
917 	  emit_barrier_after_bb (dup);
918 
919 	if (EDGE_COUNT (dup->succs) == 0)
920 	  emit_barrier_after_bb (dup);
921 
922 	if (dump_file)
923 	  fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
924 
925 	if (num == profile_count::zero () || den.nonzero_p ())
926 	  bb->count = bb->count.apply_scale (num, den);
927 	dup->count -= bb->count;
928       }
929 
930   /* Now change the edges to point to the copies, where appropriate.  */
931 
932   FOR_EACH_BB_FN (bb, cfun)
933     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
934       {
935 	basic_block src = bb;
936 	if (bitmap_bit_p (bb_with, bb->index))
937 	  src = (basic_block) bb->aux;
938 
939 	FOR_EACH_EDGE (e, ei, src->succs)
940 	  {
941 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
942 	      continue;
943 
944 	    if (bitmap_bit_p (bb_with, e->dest->index)
945 		&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
946 	      {
947 		if (dump_file)
948 		  fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
949 			   e->src->index, e->dest->index,
950 			   ((basic_block) e->dest->aux)->index);
951 		redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
952 	      }
953 	    else if (e->flags & EDGE_FALLTHRU
954 		     && bitmap_bit_p (bb_with, bb->index))
955 	      force_nonfallthru (e);
956 	  }
957       }
958 
959   /* Also redirect the function entry edge if necessary.  */
960 
961   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
962     if (bitmap_bit_p (bb_with, e->dest->index)
963 	&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
964       {
965 	basic_block split_bb = split_edge (e);
966 	e = single_succ_edge (split_bb);
967 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
968       }
969 
970   /* Make a simple_return for those exits that run without prologue.  */
971 
972   FOR_EACH_BB_REVERSE_FN (bb, cfun)
973     if (!bitmap_bit_p (bb_with, bb->index))
974       FOR_EACH_EDGE (e, ei, bb->succs)
975 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
976 	  handle_simple_exit (e);
977 
978   /* Finally, we want a single edge to put the prologue on.  Make a new
979      block before the PRO block; the edge beteen them is the edge we want.
980      Then redirect those edges into PRO that come from blocks without the
981      prologue, to point to the new block instead.  The new prologue block
982      is put at the end of the insn chain.  */
983 
984   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
985   BB_COPY_PARTITION (new_bb, pro);
986   new_bb->count = profile_count::zero ();
987   if (dump_file)
988     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
989 
990   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
991     {
992       if (bitmap_bit_p (bb_with, e->src->index)
993 	  || dominated_by_p (CDI_DOMINATORS, e->src, pro))
994 	{
995 	  ei_next (&ei);
996 	  continue;
997 	}
998 
999       new_bb->count += e->count ();
1000 
1001       redirect_edge_and_branch_force (e, new_bb);
1002       if (dump_file)
1003 	fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1004     }
1005 
1006   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1007   force_nonfallthru (*entry_edge);
1008 
1009   free_dominance_info (CDI_DOMINATORS);
1010 }
1011 
1012 /* Separate shrink-wrapping
1013 
1014    Instead of putting all of the prologue and epilogue in one spot, we
1015    can put parts of it in places where those components are executed less
1016    frequently.  The following code does this, for prologue and epilogue
1017    components that can be put in more than one location, and where those
1018    components can be executed more than once (the epilogue component will
1019    always be executed before the prologue component is executed a second
1020    time).
1021 
1022    What exactly is a component is target-dependent.  The more usual
1023    components are simple saves/restores to/from the frame of callee-saved
1024    registers.  This code treats components abstractly (as an sbitmap),
1025    letting the target handle all details.
1026 
1027    Prologue components are placed in such a way that for every component
1028    the prologue is executed as infrequently as possible.  We do this by
1029    walking the dominator tree, comparing the cost of placing a prologue
1030    component before a block to the sum of costs determined for all subtrees
1031    of that block.
1032 
1033    From this placement, we then determine for each component all blocks
1034    where at least one of this block's dominators (including itself) will
1035    get a prologue inserted.  That then is how the components are placed.
1036    We could place the epilogue components a bit smarter (we can save a
1037    bit of code size sometimes); this is a possible future improvement.
1038 
1039    Prologues and epilogues are preferably placed into a block, either at
1040    the beginning or end of it, if it is needed for all predecessor resp.
1041    successor edges; or placed on the edge otherwise.
1042 
1043    If the placement of any prologue/epilogue leads to a situation we cannot
1044    handle (for example, an abnormal edge would need to be split, or some
1045    targets want to use some specific registers that may not be available
1046    where we want to put them), separate shrink-wrapping for the components
1047    in that prologue/epilogue is aborted.  */
1048 
1049 
1050 /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1051    label LABEL.  */
1052 static void
dump_components(const char * label,sbitmap components)1053 dump_components (const char *label, sbitmap components)
1054 {
1055   if (bitmap_empty_p (components))
1056     return;
1057 
1058   fprintf (dump_file, " [%s", label);
1059 
1060   for (unsigned int j = 0; j < components->n_bits; j++)
1061     if (bitmap_bit_p (components, j))
1062       fprintf (dump_file, " %u", j);
1063 
1064   fprintf (dump_file, "]");
1065 }
1066 
1067 /* The data we collect for each bb.  */
1068 struct sw {
1069   /* What components does this BB need?  */
1070   sbitmap needs_components;
1071 
1072   /* What components does this BB have?  This is the main decision this
1073      pass makes.  */
1074   sbitmap has_components;
1075 
1076   /* The components for which we placed code at the start of the BB (instead
1077      of on all incoming edges).  */
1078   sbitmap head_components;
1079 
1080   /* The components for which we placed code at the end of the BB (instead
1081      of on all outgoing edges).  */
1082   sbitmap tail_components;
1083 
1084   /* The frequency of executing the prologue for this BB, if a prologue is
1085      placed on this BB.  This is a pessimistic estimate (no prologue is
1086      needed for edges from blocks that have the component under consideration
1087      active already).  */
1088   gcov_type own_cost;
1089 
1090   /* The frequency of executing the prologue for this BB and all BBs
1091      dominated by it.  */
1092   gcov_type total_cost;
1093 };
1094 
1095 /* A helper function for accessing the pass-specific info.  */
1096 static inline struct sw *
SW(basic_block bb)1097 SW (basic_block bb)
1098 {
1099   gcc_assert (bb->aux);
1100   return (struct sw *) bb->aux;
1101 }
1102 
1103 /* Create the pass-specific data structures for separately shrink-wrapping
1104    with components COMPONENTS.  */
1105 static void
init_separate_shrink_wrap(sbitmap components)1106 init_separate_shrink_wrap (sbitmap components)
1107 {
1108   basic_block bb;
1109   FOR_ALL_BB_FN (bb, cfun)
1110     {
1111       bb->aux = xcalloc (1, sizeof (struct sw));
1112 
1113       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1114 
1115       /* Mark all basic blocks without successor as needing all components.
1116 	 This avoids problems in at least cfgcleanup, sel-sched, and
1117 	 regrename (largely to do with all paths to such a block still
1118 	 needing the same dwarf CFI info).  */
1119       if (EDGE_COUNT (bb->succs) == 0)
1120 	bitmap_copy (SW (bb)->needs_components, components);
1121 
1122       if (dump_file)
1123 	{
1124 	  fprintf (dump_file, "bb %d components:", bb->index);
1125 	  dump_components ("has", SW (bb)->needs_components);
1126 	  fprintf (dump_file, "\n");
1127 	}
1128 
1129       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1130       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1131       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1132       bitmap_clear (SW (bb)->has_components);
1133     }
1134 }
1135 
1136 /* Destroy the pass-specific data.  */
1137 static void
fini_separate_shrink_wrap(void)1138 fini_separate_shrink_wrap (void)
1139 {
1140   basic_block bb;
1141   FOR_ALL_BB_FN (bb, cfun)
1142     if (bb->aux)
1143       {
1144 	sbitmap_free (SW (bb)->needs_components);
1145 	sbitmap_free (SW (bb)->has_components);
1146 	sbitmap_free (SW (bb)->head_components);
1147 	sbitmap_free (SW (bb)->tail_components);
1148 	free (bb->aux);
1149 	bb->aux = 0;
1150       }
1151 }
1152 
1153 /* Place the prologue for component WHICH, in the basic blocks dominated
1154    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
1155    HAS_COMPONENTS of a block if either the block has that bit set in
1156    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1157    dominator subtrees separately.  */
1158 static void
place_prologue_for_one_component(unsigned int which,basic_block head)1159 place_prologue_for_one_component (unsigned int which, basic_block head)
1160 {
1161   /* The block we are currently dealing with.  */
1162   basic_block bb = head;
1163   /* Is this the first time we visit this block, i.e. have we just gone
1164      down the tree.  */
1165   bool first_visit = true;
1166 
1167   /* Walk the dominator tree, visit one block per iteration of this loop.
1168      Each basic block is visited twice: once before visiting any children
1169      of the block, and once after visiting all of them (leaf nodes are
1170      visited only once).  As an optimization, we do not visit subtrees
1171      that can no longer influence the prologue placement.  */
1172   for (;;)
1173     {
1174       /* First visit of a block: set the (children) cost accumulator to zero;
1175 	 if the block does not have the component itself, walk down.  */
1176       if (first_visit)
1177 	{
1178 	  /* Initialize the cost.  The cost is the block execution frequency
1179 	     that does not come from backedges.  Calculating this by simply
1180 	     adding the cost of all edges that aren't backedges does not
1181 	     work: this does not always add up to the block frequency at
1182 	     all, and even if it does, rounding error makes for bad
1183 	     decisions.  */
1184 	  SW (bb)->own_cost = bb->count.to_frequency (cfun);
1185 
1186 	  edge e;
1187 	  edge_iterator ei;
1188 	  FOR_EACH_EDGE (e, ei, bb->preds)
1189 	    if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1190 	      {
1191 		if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1192 		  SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1193 		else
1194 		  SW (bb)->own_cost = 0;
1195 	      }
1196 
1197 	  SW (bb)->total_cost = 0;
1198 
1199 	  if (!bitmap_bit_p (SW (bb)->needs_components, which)
1200 	      && first_dom_son (CDI_DOMINATORS, bb))
1201 	    {
1202 	      bb = first_dom_son (CDI_DOMINATORS, bb);
1203 	      continue;
1204 	    }
1205 	}
1206 
1207       /* If this block does need the component itself, or it is cheaper to
1208 	 put the prologue here than in all the descendants that need it,
1209 	 mark it so.  If this block's immediate post-dominator is dominated
1210 	 by this block, and that needs the prologue, we can put it on this
1211 	 block as well (earlier is better).  */
1212       if (bitmap_bit_p (SW (bb)->needs_components, which)
1213 	  || SW (bb)->total_cost > SW (bb)->own_cost)
1214 	{
1215 	  SW (bb)->total_cost = SW (bb)->own_cost;
1216 	  bitmap_set_bit (SW (bb)->has_components, which);
1217 	}
1218       else
1219 	{
1220 	  basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1221 	  if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1222 	      && bitmap_bit_p (SW (kid)->has_components, which))
1223 	    {
1224 	      SW (bb)->total_cost = SW (bb)->own_cost;
1225 	      bitmap_set_bit (SW (bb)->has_components, which);
1226 	    }
1227 	}
1228 
1229       /* We are back where we started, so we are done now.  */
1230       if (bb == head)
1231 	return;
1232 
1233       /* We now know the cost of the subtree rooted at the current block.
1234 	 Accumulate this cost in the parent.  */
1235       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1236       SW (parent)->total_cost += SW (bb)->total_cost;
1237 
1238       /* Don't walk the tree down unless necessary.  */
1239       if (next_dom_son (CDI_DOMINATORS, bb)
1240           && SW (parent)->total_cost <= SW (parent)->own_cost)
1241 	{
1242 	  bb = next_dom_son (CDI_DOMINATORS, bb);
1243 	  first_visit = true;
1244 	}
1245       else
1246 	{
1247 	  bb = parent;
1248 	  first_visit = false;
1249 	}
1250     }
1251 }
1252 
1253 /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1254    setting it on any path from entry to exit where it was not already set
1255    somewhere (or, for blocks that have no path to the exit, consider only
1256    paths from the entry to the block itself).  */
1257 static void
spread_components(sbitmap components)1258 spread_components (sbitmap components)
1259 {
1260   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1261   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1262 
1263   /* A stack of all blocks left to consider, and a bitmap of all blocks
1264      on that stack.  */
1265   vec<basic_block> todo;
1266   todo.create (n_basic_blocks_for_fn (cfun));
1267   auto_bitmap seen;
1268 
1269   auto_sbitmap old (SBITMAP_SIZE (components));
1270 
1271   /* Find for every block the components that are *not* needed on some path
1272      from the entry to that block.  Do this with a flood fill from the entry
1273      block.  Every block can be visited at most as often as the number of
1274      components (plus one), and usually much less often.  */
1275 
1276   if (dump_file)
1277     fprintf (dump_file, "Spreading down...\n");
1278 
1279   basic_block bb;
1280   FOR_ALL_BB_FN (bb, cfun)
1281     bitmap_clear (SW (bb)->head_components);
1282 
1283   bitmap_copy (SW (entry_block)->head_components, components);
1284 
1285   edge e;
1286   edge_iterator ei;
1287 
1288   todo.quick_push (single_succ (entry_block));
1289   bitmap_set_bit (seen, single_succ (entry_block)->index);
1290   while (!todo.is_empty ())
1291     {
1292       bb = todo.pop ();
1293 
1294       bitmap_copy (old, SW (bb)->head_components);
1295 
1296       FOR_EACH_EDGE (e, ei, bb->preds)
1297 	bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1298 		    SW (e->src)->head_components);
1299 
1300       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1301 			SW (bb)->has_components);
1302 
1303       if (!bitmap_equal_p (old, SW (bb)->head_components))
1304 	FOR_EACH_EDGE (e, ei, bb->succs)
1305 	  if (bitmap_set_bit (seen, e->dest->index))
1306 	    todo.quick_push (e->dest);
1307 
1308       bitmap_clear_bit (seen, bb->index);
1309     }
1310 
1311   /* Find for every block the components that are *not* needed on some reverse
1312      path from the exit to that block.  */
1313 
1314   if (dump_file)
1315     fprintf (dump_file, "Spreading up...\n");
1316 
1317   /* First, mark all blocks not reachable from the exit block as not needing
1318      any component on any path to the exit.  Mark everything, and then clear
1319      again by a flood fill.  */
1320 
1321   FOR_ALL_BB_FN (bb, cfun)
1322     bitmap_copy (SW (bb)->tail_components, components);
1323 
1324   FOR_EACH_EDGE (e, ei, exit_block->preds)
1325     {
1326       todo.quick_push (e->src);
1327       bitmap_set_bit (seen, e->src->index);
1328     }
1329 
1330   while (!todo.is_empty ())
1331     {
1332       bb = todo.pop ();
1333 
1334       if (!bitmap_empty_p (SW (bb)->tail_components))
1335 	FOR_EACH_EDGE (e, ei, bb->preds)
1336 	  if (bitmap_set_bit (seen, e->src->index))
1337 	    todo.quick_push (e->src);
1338 
1339       bitmap_clear (SW (bb)->tail_components);
1340 
1341       bitmap_clear_bit (seen, bb->index);
1342     }
1343 
1344   /* And then, flood fill backwards to find for every block the components
1345      not needed on some path to the exit.  */
1346 
1347   bitmap_copy (SW (exit_block)->tail_components, components);
1348 
1349   FOR_EACH_EDGE (e, ei, exit_block->preds)
1350     {
1351       todo.quick_push (e->src);
1352       bitmap_set_bit (seen, e->src->index);
1353     }
1354 
1355   while (!todo.is_empty ())
1356     {
1357       bb = todo.pop ();
1358 
1359       bitmap_copy (old, SW (bb)->tail_components);
1360 
1361       FOR_EACH_EDGE (e, ei, bb->succs)
1362 	bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1363 		    SW (e->dest)->tail_components);
1364 
1365       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1366 			SW (bb)->has_components);
1367 
1368       if (!bitmap_equal_p (old, SW (bb)->tail_components))
1369 	FOR_EACH_EDGE (e, ei, bb->preds)
1370 	  if (bitmap_set_bit (seen, e->src->index))
1371 	    todo.quick_push (e->src);
1372 
1373       bitmap_clear_bit (seen, bb->index);
1374     }
1375 
1376   todo.release ();
1377 
1378   /* Finally, mark everything not not needed both forwards and backwards.  */
1379 
1380   FOR_EACH_BB_FN (bb, cfun)
1381     {
1382       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1383 		  SW (bb)->tail_components);
1384       bitmap_and_compl (SW (bb)->has_components, components,
1385 			SW (bb)->head_components);
1386     }
1387 
1388   FOR_ALL_BB_FN (bb, cfun)
1389     {
1390       if (dump_file)
1391 	{
1392 	  fprintf (dump_file, "bb %d components:", bb->index);
1393 	  dump_components ("has", SW (bb)->has_components);
1394 	  fprintf (dump_file, "\n");
1395 	}
1396     }
1397 }
1398 
1399 /* If we cannot handle placing some component's prologues or epilogues where
1400    we decided we should place them, unmark that component in COMPONENTS so
1401    that it is not wrapped separately.  */
1402 static void
disqualify_problematic_components(sbitmap components)1403 disqualify_problematic_components (sbitmap components)
1404 {
1405   auto_sbitmap pro (SBITMAP_SIZE (components));
1406   auto_sbitmap epi (SBITMAP_SIZE (components));
1407 
1408   basic_block bb;
1409   FOR_EACH_BB_FN (bb, cfun)
1410     {
1411       edge e;
1412       edge_iterator ei;
1413       FOR_EACH_EDGE (e, ei, bb->succs)
1414 	{
1415 	  /* Find which components we want pro/epilogues for here.  */
1416 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1417 			    SW (e->dest)->has_components);
1418 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1419 			    SW (e->src)->has_components);
1420 
1421 	  /* Ask the target what it thinks about things.  */
1422 	  if (!bitmap_empty_p (epi))
1423 	    targetm.shrink_wrap.disqualify_components (components, e, epi,
1424 						       false);
1425 	  if (!bitmap_empty_p (pro))
1426 	    targetm.shrink_wrap.disqualify_components (components, e, pro,
1427 						       true);
1428 
1429 	  /* If this edge doesn't need splitting, we're fine.  */
1430 	  if (single_pred_p (e->dest)
1431 	      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1432 	    continue;
1433 
1434 	  /* If the edge can be split, that is fine too.  */
1435 	  if ((e->flags & EDGE_ABNORMAL) == 0)
1436 	    continue;
1437 
1438 	  /* We also can handle sibcalls.  */
1439 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1440 	    {
1441 	      gcc_assert (e->flags & EDGE_SIBCALL);
1442 	      continue;
1443 	    }
1444 
1445 	  /* Remove from consideration those components we would need
1446 	     pro/epilogues for on edges where we cannot insert them.  */
1447 	  bitmap_and_compl (components, components, epi);
1448 	  bitmap_and_compl (components, components, pro);
1449 
1450 	  if (dump_file && !bitmap_subset_p (epi, components))
1451 	    {
1452 	      fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
1453 		       e->dest->index);
1454 	      if (e->flags & EDGE_EH)
1455 		fprintf (dump_file, " for EH");
1456 	      dump_components ("epi", epi);
1457 	      fprintf (dump_file, "\n");
1458 	    }
1459 
1460 	  if (dump_file && !bitmap_subset_p (pro, components))
1461 	    {
1462 	      fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
1463 		       e->dest->index);
1464 	      if (e->flags & EDGE_EH)
1465 		fprintf (dump_file, " for EH");
1466 	      dump_components ("pro", pro);
1467 	      fprintf (dump_file, "\n");
1468 	    }
1469 	}
1470     }
1471 }
1472 
1473 /* Place code for prologues and epilogues for COMPONENTS where we can put
1474    that code at the start of basic blocks.  */
1475 static void
emit_common_heads_for_components(sbitmap components)1476 emit_common_heads_for_components (sbitmap components)
1477 {
1478   auto_sbitmap pro (SBITMAP_SIZE (components));
1479   auto_sbitmap epi (SBITMAP_SIZE (components));
1480   auto_sbitmap tmp (SBITMAP_SIZE (components));
1481 
1482   basic_block bb;
1483   FOR_ALL_BB_FN (bb, cfun)
1484     bitmap_clear (SW (bb)->head_components);
1485 
1486   FOR_EACH_BB_FN (bb, cfun)
1487     {
1488       /* Find which prologue resp. epilogue components are needed for all
1489 	 predecessor edges to this block.  */
1490 
1491       /* First, select all possible components.  */
1492       bitmap_copy (epi, components);
1493       bitmap_copy (pro, components);
1494 
1495       edge e;
1496       edge_iterator ei;
1497       FOR_EACH_EDGE (e, ei, bb->preds)
1498 	{
1499 	  if (e->flags & EDGE_ABNORMAL)
1500 	    {
1501 	      bitmap_clear (epi);
1502 	      bitmap_clear (pro);
1503 	      break;
1504 	    }
1505 
1506 	  /* Deselect those epilogue components that should not be inserted
1507 	     for this edge.  */
1508 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1509 			    SW (e->dest)->has_components);
1510 	  bitmap_and (epi, epi, tmp);
1511 
1512 	  /* Similar, for the prologue.  */
1513 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1514 			    SW (e->src)->has_components);
1515 	  bitmap_and (pro, pro, tmp);
1516 	}
1517 
1518       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1519 	fprintf (dump_file, "  bb %d", bb->index);
1520 
1521       if (dump_file && !bitmap_empty_p (epi))
1522 	dump_components ("epi", epi);
1523       if (dump_file && !bitmap_empty_p (pro))
1524 	dump_components ("pro", pro);
1525 
1526       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1527 	fprintf (dump_file, "\n");
1528 
1529       /* Place code after the BB note.  */
1530       if (!bitmap_empty_p (pro))
1531 	{
1532 	  start_sequence ();
1533 	  targetm.shrink_wrap.emit_prologue_components (pro);
1534 	  rtx_insn *seq = get_insns ();
1535 	  end_sequence ();
1536 	  record_prologue_seq (seq);
1537 
1538 	  emit_insn_after (seq, bb_note (bb));
1539 
1540 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1541 	}
1542 
1543       if (!bitmap_empty_p (epi))
1544 	{
1545 	  start_sequence ();
1546 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1547 	  rtx_insn *seq = get_insns ();
1548 	  end_sequence ();
1549 	  record_epilogue_seq (seq);
1550 
1551 	  emit_insn_after (seq, bb_note (bb));
1552 
1553 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1554 	}
1555     }
1556 }
1557 
1558 /* Place code for prologues and epilogues for COMPONENTS where we can put
1559    that code at the end of basic blocks.  */
1560 static void
emit_common_tails_for_components(sbitmap components)1561 emit_common_tails_for_components (sbitmap components)
1562 {
1563   auto_sbitmap pro (SBITMAP_SIZE (components));
1564   auto_sbitmap epi (SBITMAP_SIZE (components));
1565   auto_sbitmap tmp (SBITMAP_SIZE (components));
1566 
1567   basic_block bb;
1568   FOR_ALL_BB_FN (bb, cfun)
1569     bitmap_clear (SW (bb)->tail_components);
1570 
1571   FOR_EACH_BB_FN (bb, cfun)
1572     {
1573       /* Find which prologue resp. epilogue components are needed for all
1574 	 successor edges from this block.  */
1575       if (EDGE_COUNT (bb->succs) == 0)
1576 	continue;
1577 
1578       /* First, select all possible components.  */
1579       bitmap_copy (epi, components);
1580       bitmap_copy (pro, components);
1581 
1582       edge e;
1583       edge_iterator ei;
1584       FOR_EACH_EDGE (e, ei, bb->succs)
1585 	{
1586 	  if (e->flags & EDGE_ABNORMAL)
1587 	    {
1588 	      bitmap_clear (epi);
1589 	      bitmap_clear (pro);
1590 	      break;
1591 	    }
1592 
1593 	  /* Deselect those epilogue components that should not be inserted
1594 	     for this edge, and also those that are already put at the head
1595 	     of the successor block.  */
1596 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1597 			    SW (e->dest)->has_components);
1598 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1599 	  bitmap_and (epi, epi, tmp);
1600 
1601 	  /* Similarly, for the prologue.  */
1602 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1603 			    SW (e->src)->has_components);
1604 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1605 	  bitmap_and (pro, pro, tmp);
1606 	}
1607 
1608       /* If the last insn of this block is a control flow insn we cannot
1609 	 put anything after it.  We can put our code before it instead,
1610 	 but only if that jump insn is a simple jump.  */
1611       rtx_insn *last_insn = BB_END (bb);
1612       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1613 	{
1614 	  bitmap_clear (epi);
1615 	  bitmap_clear (pro);
1616 	}
1617 
1618       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1619 	fprintf (dump_file, "  bb %d", bb->index);
1620 
1621       if (dump_file && !bitmap_empty_p (epi))
1622 	dump_components ("epi", epi);
1623       if (dump_file && !bitmap_empty_p (pro))
1624 	dump_components ("pro", pro);
1625 
1626       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1627 	fprintf (dump_file, "\n");
1628 
1629       /* Put the code at the end of the BB, but before any final jump.  */
1630       if (!bitmap_empty_p (epi))
1631 	{
1632 	  start_sequence ();
1633 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1634 	  rtx_insn *seq = get_insns ();
1635 	  end_sequence ();
1636 	  record_epilogue_seq (seq);
1637 
1638 	  if (control_flow_insn_p (last_insn))
1639 	    emit_insn_before (seq, last_insn);
1640 	  else
1641 	    emit_insn_after (seq, last_insn);
1642 
1643 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1644 	}
1645 
1646       if (!bitmap_empty_p (pro))
1647 	{
1648 	  start_sequence ();
1649 	  targetm.shrink_wrap.emit_prologue_components (pro);
1650 	  rtx_insn *seq = get_insns ();
1651 	  end_sequence ();
1652 	  record_prologue_seq (seq);
1653 
1654 	  if (control_flow_insn_p (last_insn))
1655 	    emit_insn_before (seq, last_insn);
1656 	  else
1657 	    emit_insn_after (seq, last_insn);
1658 
1659 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1660 	}
1661     }
1662 }
1663 
1664 /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1665    placed them inside blocks directly.  */
1666 static void
insert_prologue_epilogue_for_components(sbitmap components)1667 insert_prologue_epilogue_for_components (sbitmap components)
1668 {
1669   auto_sbitmap pro (SBITMAP_SIZE (components));
1670   auto_sbitmap epi (SBITMAP_SIZE (components));
1671 
1672   basic_block bb;
1673   FOR_EACH_BB_FN (bb, cfun)
1674     {
1675       if (!bb->aux)
1676 	continue;
1677 
1678       edge e;
1679       edge_iterator ei;
1680       FOR_EACH_EDGE (e, ei, bb->succs)
1681 	{
1682 	  /* Find which pro/epilogue components are needed on this edge.  */
1683 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1684 			    SW (e->dest)->has_components);
1685 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1686 			    SW (e->src)->has_components);
1687 	  bitmap_and (epi, epi, components);
1688 	  bitmap_and (pro, pro, components);
1689 
1690 	  /* Deselect those we already have put at the head or tail of the
1691 	     edge's dest resp. src.  */
1692 	  bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1693 	  bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1694 	  bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1695 	  bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1696 
1697 	  if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1698 	    {
1699 	      if (dump_file)
1700 		{
1701 		  fprintf (dump_file, "  %d->%d", e->src->index,
1702 			   e->dest->index);
1703 		  dump_components ("epi", epi);
1704 		  dump_components ("pro", pro);
1705 		  if (e->flags & EDGE_SIBCALL)
1706 		    fprintf (dump_file, "  (SIBCALL)");
1707 		  else if (e->flags & EDGE_ABNORMAL)
1708 		    fprintf (dump_file, "  (ABNORMAL)");
1709 		  fprintf (dump_file, "\n");
1710 		}
1711 
1712 	      /* Put the epilogue components in place.  */
1713 	      start_sequence ();
1714 	      targetm.shrink_wrap.emit_epilogue_components (epi);
1715 	      rtx_insn *seq = get_insns ();
1716 	      end_sequence ();
1717 	      record_epilogue_seq (seq);
1718 
1719 	      if (e->flags & EDGE_SIBCALL)
1720 		{
1721 		  gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1722 
1723 		  rtx_insn *insn = BB_END (e->src);
1724 		  gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1725 		  emit_insn_before (seq, insn);
1726 		}
1727 	      else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1728 		{
1729 		  gcc_assert (e->flags & EDGE_FALLTHRU);
1730 		  basic_block new_bb = split_edge (e);
1731 		  emit_insn_after (seq, BB_END (new_bb));
1732 		}
1733 	      else
1734 		insert_insn_on_edge (seq, e);
1735 
1736 	      /* Put the prologue components in place.  */
1737 	      start_sequence ();
1738 	      targetm.shrink_wrap.emit_prologue_components (pro);
1739 	      seq = get_insns ();
1740 	      end_sequence ();
1741 	      record_prologue_seq (seq);
1742 
1743 	      insert_insn_on_edge (seq, e);
1744 	    }
1745 	}
1746     }
1747 
1748   commit_edge_insertions ();
1749 }
1750 
1751 /* The main entry point to this subpass.  FIRST_BB is where the prologue
1752    would be normally put.  */
1753 void
try_shrink_wrapping_separate(basic_block first_bb)1754 try_shrink_wrapping_separate (basic_block first_bb)
1755 {
1756   if (HAVE_cc0)
1757     return;
1758 
1759   if (!(SHRINK_WRAPPING_ENABLED
1760 	&& flag_shrink_wrap_separate
1761 	&& optimize_function_for_speed_p (cfun)
1762 	&& targetm.shrink_wrap.get_separate_components))
1763     return;
1764 
1765   /* We don't handle "strange" functions.  */
1766   if (cfun->calls_alloca
1767       || cfun->calls_setjmp
1768       || cfun->can_throw_non_call_exceptions
1769       || crtl->calls_eh_return
1770       || crtl->has_nonlocal_goto
1771       || crtl->saves_all_registers)
1772     return;
1773 
1774   /* Ask the target what components there are.  If it returns NULL, don't
1775      do anything.  */
1776   sbitmap components = targetm.shrink_wrap.get_separate_components ();
1777   if (!components)
1778     return;
1779 
1780   /* We need LIVE info, not defining anything in the entry block and not
1781      using anything in the exit block.  A block then needs a component if
1782      the register for that component is in the IN or GEN or KILL set for
1783      that block.  */
1784   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1785   df_update_entry_exit_and_calls ();
1786   df_live_add_problem ();
1787   df_live_set_all_dirty ();
1788   df_analyze ();
1789 
1790   calculate_dominance_info (CDI_DOMINATORS);
1791   calculate_dominance_info (CDI_POST_DOMINATORS);
1792 
1793   init_separate_shrink_wrap (components);
1794 
1795   sbitmap_iterator sbi;
1796   unsigned int j;
1797   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1798     place_prologue_for_one_component (j, first_bb);
1799 
1800   spread_components (components);
1801 
1802   disqualify_problematic_components (components);
1803 
1804   /* Don't separately shrink-wrap anything where the "main" prologue will
1805      go; the target code can often optimize things if it is presented with
1806      all components together (say, if it generates store-multiple insns).  */
1807   bitmap_and_compl (components, components, SW (first_bb)->has_components);
1808 
1809   if (bitmap_empty_p (components))
1810     {
1811       if (dump_file)
1812 	fprintf (dump_file, "Not wrapping anything separately.\n");
1813     }
1814   else
1815     {
1816       if (dump_file)
1817 	{
1818 	  fprintf (dump_file, "The components we wrap separately are");
1819 	  dump_components ("sep", components);
1820 	  fprintf (dump_file, "\n");
1821 
1822 	  fprintf (dump_file, "... Inserting common heads...\n");
1823 	}
1824 
1825       emit_common_heads_for_components (components);
1826 
1827       if (dump_file)
1828 	fprintf (dump_file, "... Inserting common tails...\n");
1829 
1830       emit_common_tails_for_components (components);
1831 
1832       if (dump_file)
1833 	fprintf (dump_file, "... Inserting the more difficult ones...\n");
1834 
1835       insert_prologue_epilogue_for_components (components);
1836 
1837       if (dump_file)
1838 	fprintf (dump_file, "... Done.\n");
1839 
1840       targetm.shrink_wrap.set_handled_components (components);
1841 
1842       crtl->shrink_wrapped_separate = true;
1843     }
1844 
1845   fini_separate_shrink_wrap ();
1846 
1847   sbitmap_free (components);
1848   free_dominance_info (CDI_DOMINATORS);
1849   free_dominance_info (CDI_POST_DOMINATORS);
1850 
1851   /* All done.  */
1852   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1853   df_update_entry_exit_and_calls ();
1854   df_live_set_all_dirty ();
1855   df_analyze ();
1856 }
1857