xref: /dragonfly/contrib/gcc-8.0/gcc/shrink-wrap.c (revision 38fd1498)
1*38fd1498Szrj /* Shrink-wrapping related optimizations.
2*38fd1498Szrj    Copyright (C) 1987-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj /* This file handles shrink-wrapping related optimizations.  */
21*38fd1498Szrj 
22*38fd1498Szrj #include "config.h"
23*38fd1498Szrj #include "system.h"
24*38fd1498Szrj #include "coretypes.h"
25*38fd1498Szrj #include "backend.h"
26*38fd1498Szrj #include "target.h"
27*38fd1498Szrj #include "rtl.h"
28*38fd1498Szrj #include "tree.h"
29*38fd1498Szrj #include "cfghooks.h"
30*38fd1498Szrj #include "df.h"
31*38fd1498Szrj #include "memmodel.h"
32*38fd1498Szrj #include "tm_p.h"
33*38fd1498Szrj #include "regs.h"
34*38fd1498Szrj #include "insn-config.h"
35*38fd1498Szrj #include "emit-rtl.h"
36*38fd1498Szrj #include "output.h"
37*38fd1498Szrj #include "tree-pass.h"
38*38fd1498Szrj #include "cfgrtl.h"
39*38fd1498Szrj #include "cfgbuild.h"
40*38fd1498Szrj #include "params.h"
41*38fd1498Szrj #include "bb-reorder.h"
42*38fd1498Szrj #include "shrink-wrap.h"
43*38fd1498Szrj #include "regcprop.h"
44*38fd1498Szrj #include "rtl-iter.h"
45*38fd1498Szrj #include "valtrack.h"
46*38fd1498Szrj 
47*38fd1498Szrj 
48*38fd1498Szrj /* Return true if INSN requires the stack frame to be set up.
49*38fd1498Szrj    PROLOGUE_USED contains the hard registers used in the function
50*38fd1498Szrj    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
51*38fd1498Szrj    prologue to set up for the function.  */
52*38fd1498Szrj bool
requires_stack_frame_p(rtx_insn * insn,HARD_REG_SET prologue_used,HARD_REG_SET set_up_by_prologue)53*38fd1498Szrj requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
54*38fd1498Szrj 			HARD_REG_SET set_up_by_prologue)
55*38fd1498Szrj {
56*38fd1498Szrj   df_ref def, use;
57*38fd1498Szrj   HARD_REG_SET hardregs;
58*38fd1498Szrj   unsigned regno;
59*38fd1498Szrj 
60*38fd1498Szrj   if (CALL_P (insn))
61*38fd1498Szrj     return !SIBLING_CALL_P (insn);
62*38fd1498Szrj 
63*38fd1498Szrj   /* We need a frame to get the unique CFA expected by the unwinder.  */
64*38fd1498Szrj   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
65*38fd1498Szrj     return true;
66*38fd1498Szrj 
67*38fd1498Szrj   CLEAR_HARD_REG_SET (hardregs);
68*38fd1498Szrj   FOR_EACH_INSN_DEF (def, insn)
69*38fd1498Szrj     {
70*38fd1498Szrj       rtx dreg = DF_REF_REG (def);
71*38fd1498Szrj 
72*38fd1498Szrj       if (!REG_P (dreg))
73*38fd1498Szrj 	continue;
74*38fd1498Szrj 
75*38fd1498Szrj       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76*38fd1498Szrj     }
77*38fd1498Szrj   if (hard_reg_set_intersect_p (hardregs, prologue_used))
78*38fd1498Szrj     return true;
79*38fd1498Szrj   AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
80*38fd1498Szrj   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
81*38fd1498Szrj     if (TEST_HARD_REG_BIT (hardregs, regno)
82*38fd1498Szrj 	&& df_regs_ever_live_p (regno))
83*38fd1498Szrj       return true;
84*38fd1498Szrj 
85*38fd1498Szrj   FOR_EACH_INSN_USE (use, insn)
86*38fd1498Szrj     {
87*38fd1498Szrj       rtx reg = DF_REF_REG (use);
88*38fd1498Szrj 
89*38fd1498Szrj       if (!REG_P (reg))
90*38fd1498Szrj 	continue;
91*38fd1498Szrj 
92*38fd1498Szrj       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
93*38fd1498Szrj 			   REGNO (reg));
94*38fd1498Szrj     }
95*38fd1498Szrj   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
96*38fd1498Szrj     return true;
97*38fd1498Szrj 
98*38fd1498Szrj   return false;
99*38fd1498Szrj }
100*38fd1498Szrj 
101*38fd1498Szrj /* See whether there has a single live edge from BB, which dest uses
102*38fd1498Szrj    [REGNO, END_REGNO).  Return the live edge if its dest bb has
103*38fd1498Szrj    one or two predecessors.  Otherwise return NULL.  */
104*38fd1498Szrj 
105*38fd1498Szrj static edge
live_edge_for_reg(basic_block bb,int regno,int end_regno)106*38fd1498Szrj live_edge_for_reg (basic_block bb, int regno, int end_regno)
107*38fd1498Szrj {
108*38fd1498Szrj   edge e, live_edge;
109*38fd1498Szrj   edge_iterator ei;
110*38fd1498Szrj   bitmap live;
111*38fd1498Szrj   int i;
112*38fd1498Szrj 
113*38fd1498Szrj   live_edge = NULL;
114*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->succs)
115*38fd1498Szrj     {
116*38fd1498Szrj       live = df_get_live_in (e->dest);
117*38fd1498Szrj       for (i = regno; i < end_regno; i++)
118*38fd1498Szrj 	if (REGNO_REG_SET_P (live, i))
119*38fd1498Szrj 	  {
120*38fd1498Szrj 	    if (live_edge && live_edge != e)
121*38fd1498Szrj 	      return NULL;
122*38fd1498Szrj 	    live_edge = e;
123*38fd1498Szrj 	  }
124*38fd1498Szrj     }
125*38fd1498Szrj 
126*38fd1498Szrj   /* We can sometimes encounter dead code.  Don't try to move it
127*38fd1498Szrj      into the exit block.  */
128*38fd1498Szrj   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
129*38fd1498Szrj     return NULL;
130*38fd1498Szrj 
131*38fd1498Szrj   /* Reject targets of abnormal edges.  This is needed for correctness
132*38fd1498Szrj      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
133*38fd1498Szrj      exception edges even though it is generally treated as call-saved
134*38fd1498Szrj      for the majority of the compilation.  Moving across abnormal edges
135*38fd1498Szrj      isn't going to be interesting for shrink-wrap usage anyway.  */
136*38fd1498Szrj   if (live_edge->flags & EDGE_ABNORMAL)
137*38fd1498Szrj     return NULL;
138*38fd1498Szrj 
139*38fd1498Szrj   /* When live_edge->dest->preds == 2, we can create a new block on
140*38fd1498Szrj      the edge to make it meet the requirement.  */
141*38fd1498Szrj   if (EDGE_COUNT (live_edge->dest->preds) > 2)
142*38fd1498Szrj     return NULL;
143*38fd1498Szrj 
144*38fd1498Szrj   return live_edge;
145*38fd1498Szrj }
146*38fd1498Szrj 
147*38fd1498Szrj /* Try to move INSN from BB to a successor.  Return true on success.
148*38fd1498Szrj    USES and DEFS are the set of registers that are used and defined
149*38fd1498Szrj    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
150*38fd1498Szrj    is splitted or not.  */
151*38fd1498Szrj 
152*38fd1498Szrj static bool
move_insn_for_shrink_wrap(basic_block bb,rtx_insn * insn,const HARD_REG_SET uses,const HARD_REG_SET defs,bool * split_p,struct dead_debug_local * debug)153*38fd1498Szrj move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
154*38fd1498Szrj 			   const HARD_REG_SET uses,
155*38fd1498Szrj 			   const HARD_REG_SET defs,
156*38fd1498Szrj 			   bool *split_p,
157*38fd1498Szrj 			   struct dead_debug_local *debug)
158*38fd1498Szrj {
159*38fd1498Szrj   rtx set, src, dest;
160*38fd1498Szrj   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
161*38fd1498Szrj   unsigned int i, dregno, end_dregno;
162*38fd1498Szrj   unsigned int sregno = FIRST_PSEUDO_REGISTER;
163*38fd1498Szrj   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
164*38fd1498Szrj   basic_block next_block;
165*38fd1498Szrj   edge live_edge;
166*38fd1498Szrj   rtx_insn *dinsn;
167*38fd1498Szrj   df_ref def;
168*38fd1498Szrj 
169*38fd1498Szrj   /* Look for a simple register assignment.  We don't use single_set here
170*38fd1498Szrj      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
171*38fd1498Szrj      destinations.  */
172*38fd1498Szrj   if (!INSN_P (insn))
173*38fd1498Szrj     return false;
174*38fd1498Szrj   set = PATTERN (insn);
175*38fd1498Szrj   if (GET_CODE (set) != SET)
176*38fd1498Szrj     return false;
177*38fd1498Szrj   src = SET_SRC (set);
178*38fd1498Szrj   dest = SET_DEST (set);
179*38fd1498Szrj 
180*38fd1498Szrj   /* For the destination, we want only a register.  Also disallow STACK
181*38fd1498Szrj      or FRAME related adjustments.  They are likely part of the prologue,
182*38fd1498Szrj      so keep them in the entry block.  */
183*38fd1498Szrj   if (!REG_P (dest)
184*38fd1498Szrj       || dest == stack_pointer_rtx
185*38fd1498Szrj       || dest == frame_pointer_rtx
186*38fd1498Szrj       || dest == hard_frame_pointer_rtx)
187*38fd1498Szrj     return false;
188*38fd1498Szrj 
189*38fd1498Szrj   /* For the source, we want one of:
190*38fd1498Szrj       (1) A (non-overlapping) register
191*38fd1498Szrj       (2) A constant,
192*38fd1498Szrj       (3) An expression involving no more than one register.
193*38fd1498Szrj 
194*38fd1498Szrj      That last point comes from the code following, which was originally
195*38fd1498Szrj      written to handle only register move operations, and still only handles
196*38fd1498Szrj      a single source register when checking for overlaps.  Happily, the
197*38fd1498Szrj      same checks can be applied to expressions like (plus reg const).  */
198*38fd1498Szrj 
199*38fd1498Szrj   if (CONSTANT_P (src))
200*38fd1498Szrj     ;
201*38fd1498Szrj   else if (!REG_P (src))
202*38fd1498Szrj     {
203*38fd1498Szrj       rtx src_inner = NULL_RTX;
204*38fd1498Szrj 
205*38fd1498Szrj       if (can_throw_internal (insn))
206*38fd1498Szrj 	return false;
207*38fd1498Szrj 
208*38fd1498Szrj       subrtx_var_iterator::array_type array;
209*38fd1498Szrj       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210*38fd1498Szrj 	{
211*38fd1498Szrj 	  rtx x = *iter;
212*38fd1498Szrj 	  switch (GET_RTX_CLASS (GET_CODE (x)))
213*38fd1498Szrj 	    {
214*38fd1498Szrj 	    case RTX_CONST_OBJ:
215*38fd1498Szrj 	    case RTX_COMPARE:
216*38fd1498Szrj 	    case RTX_COMM_COMPARE:
217*38fd1498Szrj 	    case RTX_BIN_ARITH:
218*38fd1498Szrj 	    case RTX_COMM_ARITH:
219*38fd1498Szrj 	    case RTX_UNARY:
220*38fd1498Szrj 	    case RTX_TERNARY:
221*38fd1498Szrj 	      /* Constant or expression.  Continue.  */
222*38fd1498Szrj 	      break;
223*38fd1498Szrj 
224*38fd1498Szrj 	    case RTX_OBJ:
225*38fd1498Szrj 	    case RTX_EXTRA:
226*38fd1498Szrj 	      switch (GET_CODE (x))
227*38fd1498Szrj 		{
228*38fd1498Szrj 		case UNSPEC:
229*38fd1498Szrj 		case SUBREG:
230*38fd1498Szrj 		case STRICT_LOW_PART:
231*38fd1498Szrj 		case PC:
232*38fd1498Szrj 		case LO_SUM:
233*38fd1498Szrj 		  /* Ok.  Continue.  */
234*38fd1498Szrj 		  break;
235*38fd1498Szrj 
236*38fd1498Szrj 		case REG:
237*38fd1498Szrj 		  /* Fail if we see a second inner register.  */
238*38fd1498Szrj 		  if (src_inner != NULL)
239*38fd1498Szrj 		    return false;
240*38fd1498Szrj 		  src_inner = x;
241*38fd1498Szrj 		  break;
242*38fd1498Szrj 
243*38fd1498Szrj 		default:
244*38fd1498Szrj 		  return false;
245*38fd1498Szrj 		}
246*38fd1498Szrj 	      break;
247*38fd1498Szrj 
248*38fd1498Szrj 	    default:
249*38fd1498Szrj 	      return false;
250*38fd1498Szrj 	    }
251*38fd1498Szrj 	}
252*38fd1498Szrj 
253*38fd1498Szrj       if (src_inner != NULL)
254*38fd1498Szrj 	src = src_inner;
255*38fd1498Szrj     }
256*38fd1498Szrj 
257*38fd1498Szrj   /* Make sure that the source register isn't defined later in BB.  */
258*38fd1498Szrj   if (REG_P (src))
259*38fd1498Szrj     {
260*38fd1498Szrj       sregno = REGNO (src);
261*38fd1498Szrj       end_sregno = END_REGNO (src);
262*38fd1498Szrj       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
263*38fd1498Szrj 	return false;
264*38fd1498Szrj     }
265*38fd1498Szrj 
266*38fd1498Szrj   /* Make sure that the destination register isn't referenced later in BB.  */
267*38fd1498Szrj   dregno = REGNO (dest);
268*38fd1498Szrj   end_dregno = END_REGNO (dest);
269*38fd1498Szrj   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
270*38fd1498Szrj       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
271*38fd1498Szrj     return false;
272*38fd1498Szrj 
273*38fd1498Szrj   /* See whether there is a successor block to which we could move INSN.  */
274*38fd1498Szrj   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
275*38fd1498Szrj   if (!live_edge)
276*38fd1498Szrj     return false;
277*38fd1498Szrj 
278*38fd1498Szrj   next_block = live_edge->dest;
279*38fd1498Szrj   /* Create a new basic block on the edge.  */
280*38fd1498Szrj   if (EDGE_COUNT (next_block->preds) == 2)
281*38fd1498Szrj     {
282*38fd1498Szrj       /* split_edge for a block with only one successor is meaningless.  */
283*38fd1498Szrj       if (EDGE_COUNT (bb->succs) == 1)
284*38fd1498Szrj 	return false;
285*38fd1498Szrj 
286*38fd1498Szrj       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
287*38fd1498Szrj       if (!df_live)
288*38fd1498Szrj 	return false;
289*38fd1498Szrj 
290*38fd1498Szrj       basic_block old_dest = live_edge->dest;
291*38fd1498Szrj       next_block = split_edge (live_edge);
292*38fd1498Szrj 
293*38fd1498Szrj       /* We create a new basic block.  Call df_grow_bb_info to make sure
294*38fd1498Szrj 	 all data structures are allocated.  */
295*38fd1498Szrj       df_grow_bb_info (df_live);
296*38fd1498Szrj 
297*38fd1498Szrj       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
298*38fd1498Szrj 		  df_get_live_in (old_dest));
299*38fd1498Szrj       df_set_bb_dirty (next_block);
300*38fd1498Szrj 
301*38fd1498Szrj       /* We should not split more than once for a function.  */
302*38fd1498Szrj       if (*split_p)
303*38fd1498Szrj 	return false;
304*38fd1498Szrj 
305*38fd1498Szrj       *split_p = true;
306*38fd1498Szrj     }
307*38fd1498Szrj 
308*38fd1498Szrj   /* At this point we are committed to moving INSN, but let's try to
309*38fd1498Szrj      move it as far as we can.  */
310*38fd1498Szrj   do
311*38fd1498Szrj     {
312*38fd1498Szrj       if (MAY_HAVE_DEBUG_BIND_INSNS)
313*38fd1498Szrj 	{
314*38fd1498Szrj 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
315*38fd1498Szrj 	    if (DEBUG_BIND_INSN_P (dinsn))
316*38fd1498Szrj 	      {
317*38fd1498Szrj 		df_ref use;
318*38fd1498Szrj 		FOR_EACH_INSN_USE (use, dinsn)
319*38fd1498Szrj 		  if (refers_to_regno_p (dregno, end_dregno,
320*38fd1498Szrj 					 DF_REF_REG (use), (rtx *) NULL))
321*38fd1498Szrj 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
322*38fd1498Szrj 	      }
323*38fd1498Szrj 	    else if (dinsn == insn)
324*38fd1498Szrj 	      break;
325*38fd1498Szrj 	}
326*38fd1498Szrj       live_out = df_get_live_out (bb);
327*38fd1498Szrj       live_in = df_get_live_in (next_block);
328*38fd1498Szrj       bb = next_block;
329*38fd1498Szrj 
330*38fd1498Szrj       /* Check whether BB uses DEST or clobbers DEST.  We need to add
331*38fd1498Szrj 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
332*38fd1498Szrj 	 except for any part that overlaps SRC (next loop).  */
333*38fd1498Szrj       if (!*split_p)
334*38fd1498Szrj 	{
335*38fd1498Szrj 	  bb_uses = &DF_LR_BB_INFO (bb)->use;
336*38fd1498Szrj 	  bb_defs = &DF_LR_BB_INFO (bb)->def;
337*38fd1498Szrj 	}
338*38fd1498Szrj       if (df_live)
339*38fd1498Szrj 	{
340*38fd1498Szrj 	  for (i = dregno; i < end_dregno; i++)
341*38fd1498Szrj 	    {
342*38fd1498Szrj 	      if (*split_p
343*38fd1498Szrj 		  || REGNO_REG_SET_P (bb_uses, i)
344*38fd1498Szrj 		  || REGNO_REG_SET_P (bb_defs, i)
345*38fd1498Szrj 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346*38fd1498Szrj 		next_block = NULL;
347*38fd1498Szrj 	      CLEAR_REGNO_REG_SET (live_out, i);
348*38fd1498Szrj 	      CLEAR_REGNO_REG_SET (live_in, i);
349*38fd1498Szrj 	    }
350*38fd1498Szrj 
351*38fd1498Szrj 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
352*38fd1498Szrj 	     Either way, SRC is now live on entry.  */
353*38fd1498Szrj 	  for (i = sregno; i < end_sregno; i++)
354*38fd1498Szrj 	    {
355*38fd1498Szrj 	      if (*split_p
356*38fd1498Szrj 		  || REGNO_REG_SET_P (bb_defs, i)
357*38fd1498Szrj 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358*38fd1498Szrj 		next_block = NULL;
359*38fd1498Szrj 	      SET_REGNO_REG_SET (live_out, i);
360*38fd1498Szrj 	      SET_REGNO_REG_SET (live_in, i);
361*38fd1498Szrj 	    }
362*38fd1498Szrj 	}
363*38fd1498Szrj       else
364*38fd1498Szrj 	{
365*38fd1498Szrj 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366*38fd1498Szrj 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
367*38fd1498Szrj 	     at -O1, just give up searching NEXT_BLOCK.  */
368*38fd1498Szrj 	  next_block = NULL;
369*38fd1498Szrj 	  for (i = dregno; i < end_dregno; i++)
370*38fd1498Szrj 	    {
371*38fd1498Szrj 	      CLEAR_REGNO_REG_SET (live_out, i);
372*38fd1498Szrj 	      CLEAR_REGNO_REG_SET (live_in, i);
373*38fd1498Szrj 	    }
374*38fd1498Szrj 
375*38fd1498Szrj 	  for (i = sregno; i < end_sregno; i++)
376*38fd1498Szrj 	    {
377*38fd1498Szrj 	      SET_REGNO_REG_SET (live_out, i);
378*38fd1498Szrj 	      SET_REGNO_REG_SET (live_in, i);
379*38fd1498Szrj 	    }
380*38fd1498Szrj 	}
381*38fd1498Szrj 
382*38fd1498Szrj       /* If we don't need to add the move to BB, look for a single
383*38fd1498Szrj 	 successor block.  */
384*38fd1498Szrj       if (next_block)
385*38fd1498Szrj 	{
386*38fd1498Szrj 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387*38fd1498Szrj 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388*38fd1498Szrj 	    break;
389*38fd1498Szrj 	  next_block = live_edge->dest;
390*38fd1498Szrj 	}
391*38fd1498Szrj     }
392*38fd1498Szrj   while (next_block);
393*38fd1498Szrj 
394*38fd1498Szrj   /* For the new created basic block, there is no dataflow info at all.
395*38fd1498Szrj      So skip the following dataflow update and check.  */
396*38fd1498Szrj   if (!(*split_p))
397*38fd1498Szrj     {
398*38fd1498Szrj       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
399*38fd1498Szrj 	 (next loop).  */
400*38fd1498Szrj       for (i = dregno; i < end_dregno; i++)
401*38fd1498Szrj 	{
402*38fd1498Szrj 	  CLEAR_REGNO_REG_SET (bb_uses, i);
403*38fd1498Szrj 	  SET_REGNO_REG_SET (bb_defs, i);
404*38fd1498Szrj 	}
405*38fd1498Szrj 
406*38fd1498Szrj       /* BB now uses SRC.  */
407*38fd1498Szrj       for (i = sregno; i < end_sregno; i++)
408*38fd1498Szrj 	SET_REGNO_REG_SET (bb_uses, i);
409*38fd1498Szrj     }
410*38fd1498Szrj 
411*38fd1498Szrj   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
412*38fd1498Szrj   if (debug->used && !bitmap_empty_p (debug->used))
413*38fd1498Szrj     FOR_EACH_INSN_DEF (def, insn)
414*38fd1498Szrj       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
415*38fd1498Szrj 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
416*38fd1498Szrj 
417*38fd1498Szrj   emit_insn_after (PATTERN (insn), bb_note (bb));
418*38fd1498Szrj   delete_insn (insn);
419*38fd1498Szrj   return true;
420*38fd1498Szrj }
421*38fd1498Szrj 
422*38fd1498Szrj /* Look for register copies in the first block of the function, and move
423*38fd1498Szrj    them down into successor blocks if the register is used only on one
424*38fd1498Szrj    path.  This exposes more opportunities for shrink-wrapping.  These
425*38fd1498Szrj    kinds of sets often occur when incoming argument registers are moved
426*38fd1498Szrj    to call-saved registers because their values are live across one or
427*38fd1498Szrj    more calls during the function.  */
428*38fd1498Szrj 
429*38fd1498Szrj static void
prepare_shrink_wrap(basic_block entry_block)430*38fd1498Szrj prepare_shrink_wrap (basic_block entry_block)
431*38fd1498Szrj {
432*38fd1498Szrj   rtx_insn *insn, *curr;
433*38fd1498Szrj   rtx x;
434*38fd1498Szrj   HARD_REG_SET uses, defs;
435*38fd1498Szrj   df_ref def, use;
436*38fd1498Szrj   bool split_p = false;
437*38fd1498Szrj   unsigned int i;
438*38fd1498Szrj   struct dead_debug_local debug;
439*38fd1498Szrj 
440*38fd1498Szrj   if (JUMP_P (BB_END (entry_block)))
441*38fd1498Szrj     {
442*38fd1498Szrj       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
443*38fd1498Szrj 	 to sink the copies from parameter to callee saved register out of
444*38fd1498Szrj 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
445*38fd1498Szrj 	 to release some dependences.  */
446*38fd1498Szrj       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
447*38fd1498Szrj     }
448*38fd1498Szrj 
449*38fd1498Szrj   dead_debug_local_init (&debug, NULL, NULL);
450*38fd1498Szrj   CLEAR_HARD_REG_SET (uses);
451*38fd1498Szrj   CLEAR_HARD_REG_SET (defs);
452*38fd1498Szrj 
453*38fd1498Szrj   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
454*38fd1498Szrj     if (NONDEBUG_INSN_P (insn)
455*38fd1498Szrj 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
456*38fd1498Szrj 				       &split_p, &debug))
457*38fd1498Szrj       {
458*38fd1498Szrj 	/* Add all defined registers to DEFs.  */
459*38fd1498Szrj 	FOR_EACH_INSN_DEF (def, insn)
460*38fd1498Szrj 	  {
461*38fd1498Szrj 	    x = DF_REF_REG (def);
462*38fd1498Szrj 	    if (REG_P (x) && HARD_REGISTER_P (x))
463*38fd1498Szrj 	      for (i = REGNO (x); i < END_REGNO (x); i++)
464*38fd1498Szrj 		SET_HARD_REG_BIT (defs, i);
465*38fd1498Szrj 	  }
466*38fd1498Szrj 
467*38fd1498Szrj 	/* Add all used registers to USESs.  */
468*38fd1498Szrj 	FOR_EACH_INSN_USE (use, insn)
469*38fd1498Szrj 	  {
470*38fd1498Szrj 	    x = DF_REF_REG (use);
471*38fd1498Szrj 	    if (REG_P (x) && HARD_REGISTER_P (x))
472*38fd1498Szrj 	      for (i = REGNO (x); i < END_REGNO (x); i++)
473*38fd1498Szrj 		SET_HARD_REG_BIT (uses, i);
474*38fd1498Szrj 	  }
475*38fd1498Szrj       }
476*38fd1498Szrj 
477*38fd1498Szrj   dead_debug_local_finish (&debug, NULL);
478*38fd1498Szrj }
479*38fd1498Szrj 
480*38fd1498Szrj /* Return whether basic block PRO can get the prologue.  It can not if it
481*38fd1498Szrj    has incoming complex edges that need a prologue inserted (we make a new
482*38fd1498Szrj    block for the prologue, so those edges would need to be redirected, which
483*38fd1498Szrj    does not work).  It also can not if there exist registers live on entry
484*38fd1498Szrj    to PRO that are clobbered by the prologue.  */
485*38fd1498Szrj 
486*38fd1498Szrj static bool
can_get_prologue(basic_block pro,HARD_REG_SET prologue_clobbered)487*38fd1498Szrj can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
488*38fd1498Szrj {
489*38fd1498Szrj   edge e;
490*38fd1498Szrj   edge_iterator ei;
491*38fd1498Szrj   FOR_EACH_EDGE (e, ei, pro->preds)
492*38fd1498Szrj     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
493*38fd1498Szrj 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
494*38fd1498Szrj       return false;
495*38fd1498Szrj 
496*38fd1498Szrj   HARD_REG_SET live;
497*38fd1498Szrj   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
498*38fd1498Szrj   if (hard_reg_set_intersect_p (live, prologue_clobbered))
499*38fd1498Szrj     return false;
500*38fd1498Szrj 
501*38fd1498Szrj   return true;
502*38fd1498Szrj }
503*38fd1498Szrj 
504*38fd1498Szrj /* Return whether we can duplicate basic block BB for shrink wrapping.  We
505*38fd1498Szrj    cannot if the block cannot be duplicated at all, or if any of its incoming
506*38fd1498Szrj    edges are complex and come from a block that does not require a prologue
507*38fd1498Szrj    (we cannot redirect such edges), or if the block is too big to copy.
508*38fd1498Szrj    PRO is the basic block before which we would put the prologue, MAX_SIZE is
509*38fd1498Szrj    the maximum size block we allow to be copied.  */
510*38fd1498Szrj 
511*38fd1498Szrj static bool
can_dup_for_shrink_wrapping(basic_block bb,basic_block pro,unsigned max_size)512*38fd1498Szrj can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
513*38fd1498Szrj {
514*38fd1498Szrj   if (!can_duplicate_block_p (bb))
515*38fd1498Szrj     return false;
516*38fd1498Szrj 
517*38fd1498Szrj   edge e;
518*38fd1498Szrj   edge_iterator ei;
519*38fd1498Szrj   FOR_EACH_EDGE (e, ei, bb->preds)
520*38fd1498Szrj     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
521*38fd1498Szrj 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
522*38fd1498Szrj       return false;
523*38fd1498Szrj 
524*38fd1498Szrj   unsigned size = 0;
525*38fd1498Szrj 
526*38fd1498Szrj   rtx_insn *insn;
527*38fd1498Szrj   FOR_BB_INSNS (bb, insn)
528*38fd1498Szrj     if (NONDEBUG_INSN_P (insn))
529*38fd1498Szrj       {
530*38fd1498Szrj 	size += get_attr_min_length (insn);
531*38fd1498Szrj 	if (size > max_size)
532*38fd1498Szrj 	  return false;
533*38fd1498Szrj       }
534*38fd1498Szrj 
535*38fd1498Szrj   return true;
536*38fd1498Szrj }
537*38fd1498Szrj 
538*38fd1498Szrj /* Do whatever needs to be done for exits that run without prologue.
539*38fd1498Szrj    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
540*38fd1498Szrj 
541*38fd1498Szrj static void
handle_simple_exit(edge e)542*38fd1498Szrj handle_simple_exit (edge e)
543*38fd1498Szrj {
544*38fd1498Szrj 
545*38fd1498Szrj   if (e->flags & EDGE_SIBCALL)
546*38fd1498Szrj     {
547*38fd1498Szrj       /* Tell function.c to take no further action on this edge.  */
548*38fd1498Szrj       e->flags |= EDGE_IGNORE;
549*38fd1498Szrj 
550*38fd1498Szrj       e->flags &= ~EDGE_FALLTHRU;
551*38fd1498Szrj       emit_barrier_after_bb (e->src);
552*38fd1498Szrj       return;
553*38fd1498Szrj     }
554*38fd1498Szrj 
555*38fd1498Szrj   /* If the basic block the edge comes from has multiple successors,
556*38fd1498Szrj      split the edge.  */
557*38fd1498Szrj   if (EDGE_COUNT (e->src->succs) > 1)
558*38fd1498Szrj     {
559*38fd1498Szrj       basic_block old_bb = e->src;
560*38fd1498Szrj       rtx_insn *end = BB_END (old_bb);
561*38fd1498Szrj       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
562*38fd1498Szrj       basic_block new_bb = create_basic_block (note, note, old_bb);
563*38fd1498Szrj       BB_COPY_PARTITION (new_bb, old_bb);
564*38fd1498Szrj       BB_END (old_bb) = end;
565*38fd1498Szrj 
566*38fd1498Szrj       redirect_edge_succ (e, new_bb);
567*38fd1498Szrj       new_bb->count = e->count ();
568*38fd1498Szrj       e->flags |= EDGE_FALLTHRU;
569*38fd1498Szrj 
570*38fd1498Szrj       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
571*38fd1498Szrj     }
572*38fd1498Szrj 
573*38fd1498Szrj   e->flags &= ~EDGE_FALLTHRU;
574*38fd1498Szrj   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
575*38fd1498Szrj 					     BB_END (e->src));
576*38fd1498Szrj   JUMP_LABEL (ret) = simple_return_rtx;
577*38fd1498Szrj   emit_barrier_after_bb (e->src);
578*38fd1498Szrj 
579*38fd1498Szrj   if (dump_file)
580*38fd1498Szrj     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
581*38fd1498Szrj 	     INSN_UID (ret), e->src->index);
582*38fd1498Szrj }
583*38fd1498Szrj 
584*38fd1498Szrj /* Try to perform a kind of shrink-wrapping, making sure the
585*38fd1498Szrj    prologue/epilogue is emitted only around those parts of the
586*38fd1498Szrj    function that require it.
587*38fd1498Szrj 
588*38fd1498Szrj    There will be exactly one prologue, and it will be executed either
589*38fd1498Szrj    zero or one time, on any path.  Depending on where the prologue is
590*38fd1498Szrj    placed, some of the basic blocks can be reached via both paths with
591*38fd1498Szrj    and without a prologue.  Such blocks will be duplicated here, and the
592*38fd1498Szrj    edges changed to match.
593*38fd1498Szrj 
594*38fd1498Szrj    Paths that go to the exit without going through the prologue will use
595*38fd1498Szrj    a simple_return instead of the epilogue.  We maximize the number of
596*38fd1498Szrj    those, making sure to only duplicate blocks that can be duplicated.
597*38fd1498Szrj    If the prologue can then still be placed in multiple locations, we
598*38fd1498Szrj    place it as early as possible.
599*38fd1498Szrj 
600*38fd1498Szrj    An example, where we duplicate blocks with control flow (legend:
601*38fd1498Szrj    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
602*38fd1498Szrj    be taken to point down or to the right, to simplify the diagram; here,
603*38fd1498Szrj    block 3 needs a prologue, the rest does not):
604*38fd1498Szrj 
605*38fd1498Szrj 
606*38fd1498Szrj        B                 B
607*38fd1498Szrj        |                 |
608*38fd1498Szrj        2                 2
609*38fd1498Szrj        |\                |\
610*38fd1498Szrj        | 3    becomes    | 3
611*38fd1498Szrj        |/                |  \
612*38fd1498Szrj        4                 7   4
613*38fd1498Szrj        |\                |\  |\
614*38fd1498Szrj        | 5               | 8 | 5
615*38fd1498Szrj        |/                |/  |/
616*38fd1498Szrj        6                 9   6
617*38fd1498Szrj        |                 |   |
618*38fd1498Szrj        R                 S   R
619*38fd1498Szrj 
620*38fd1498Szrj 
621*38fd1498Szrj    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
622*38fd1498Szrj    edge 2->3).
623*38fd1498Szrj 
624*38fd1498Szrj    Another example, where part of a loop is duplicated (again, bb 3 is
625*38fd1498Szrj    the only block that needs a prologue):
626*38fd1498Szrj 
627*38fd1498Szrj 
628*38fd1498Szrj        B   3<--              B       ->3<--
629*38fd1498Szrj        |   |   |             |      |  |   |
630*38fd1498Szrj        |   v   |   becomes   |      |  v   |
631*38fd1498Szrj        2---4---              2---5--   4---
632*38fd1498Szrj            |                     |     |
633*38fd1498Szrj            R                     S     R
634*38fd1498Szrj 
635*38fd1498Szrj 
636*38fd1498Szrj    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
637*38fd1498Szrj 
638*38fd1498Szrj    ENTRY_EDGE is the edge where the prologue will be placed, possibly
639*38fd1498Szrj    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
640*38fd1498Szrj 
641*38fd1498Szrj void
try_shrink_wrapping(edge * entry_edge,rtx_insn * prologue_seq)642*38fd1498Szrj try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
643*38fd1498Szrj {
644*38fd1498Szrj   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
645*38fd1498Szrj      no sense to shrink-wrap: then do not shrink-wrap!  */
646*38fd1498Szrj 
647*38fd1498Szrj   if (!SHRINK_WRAPPING_ENABLED)
648*38fd1498Szrj     return;
649*38fd1498Szrj 
650*38fd1498Szrj   if (crtl->profile && !targetm.profile_before_prologue ())
651*38fd1498Szrj     return;
652*38fd1498Szrj 
653*38fd1498Szrj   if (crtl->calls_eh_return)
654*38fd1498Szrj     return;
655*38fd1498Szrj 
656*38fd1498Szrj   bool empty_prologue = true;
657*38fd1498Szrj   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
658*38fd1498Szrj     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
659*38fd1498Szrj       {
660*38fd1498Szrj 	empty_prologue = false;
661*38fd1498Szrj 	break;
662*38fd1498Szrj       }
663*38fd1498Szrj   if (empty_prologue)
664*38fd1498Szrj     return;
665*38fd1498Szrj 
666*38fd1498Szrj   /* Move some code down to expose more shrink-wrapping opportunities.  */
667*38fd1498Szrj 
668*38fd1498Szrj   basic_block entry = (*entry_edge)->dest;
669*38fd1498Szrj   prepare_shrink_wrap (entry);
670*38fd1498Szrj 
671*38fd1498Szrj   if (dump_file)
672*38fd1498Szrj     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
673*38fd1498Szrj 
674*38fd1498Szrj   /* Compute the registers set and used in the prologue.  */
675*38fd1498Szrj 
676*38fd1498Szrj   HARD_REG_SET prologue_clobbered, prologue_used;
677*38fd1498Szrj   CLEAR_HARD_REG_SET (prologue_clobbered);
678*38fd1498Szrj   CLEAR_HARD_REG_SET (prologue_used);
679*38fd1498Szrj   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
680*38fd1498Szrj     if (NONDEBUG_INSN_P (insn))
681*38fd1498Szrj       {
682*38fd1498Szrj 	HARD_REG_SET this_used;
683*38fd1498Szrj 	CLEAR_HARD_REG_SET (this_used);
684*38fd1498Szrj 	note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
685*38fd1498Szrj 	AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
686*38fd1498Szrj 	IOR_HARD_REG_SET (prologue_used, this_used);
687*38fd1498Szrj 	note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered);
688*38fd1498Szrj       }
689*38fd1498Szrj   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
690*38fd1498Szrj   if (frame_pointer_needed)
691*38fd1498Szrj     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
692*38fd1498Szrj 
693*38fd1498Szrj   /* Find out what registers are set up by the prologue; any use of these
694*38fd1498Szrj      cannot happen before the prologue.  */
695*38fd1498Szrj 
696*38fd1498Szrj   struct hard_reg_set_container set_up_by_prologue;
697*38fd1498Szrj   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
698*38fd1498Szrj   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
699*38fd1498Szrj   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
700*38fd1498Szrj   if (frame_pointer_needed)
701*38fd1498Szrj     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
702*38fd1498Szrj 			 HARD_FRAME_POINTER_REGNUM);
703*38fd1498Szrj   if (pic_offset_table_rtx
704*38fd1498Szrj       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
705*38fd1498Szrj     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
706*38fd1498Szrj 			 PIC_OFFSET_TABLE_REGNUM);
707*38fd1498Szrj   if (crtl->drap_reg)
708*38fd1498Szrj     add_to_hard_reg_set (&set_up_by_prologue.set,
709*38fd1498Szrj 			 GET_MODE (crtl->drap_reg),
710*38fd1498Szrj 			 REGNO (crtl->drap_reg));
711*38fd1498Szrj   if (targetm.set_up_by_prologue)
712*38fd1498Szrj     targetm.set_up_by_prologue (&set_up_by_prologue);
713*38fd1498Szrj 
714*38fd1498Szrj   /* We will insert the prologue before the basic block PRO.  PRO should
715*38fd1498Szrj      dominate all basic blocks that need the prologue to be executed
716*38fd1498Szrj      before them.  First, make PRO the "tightest wrap" possible.  */
717*38fd1498Szrj 
718*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
719*38fd1498Szrj 
720*38fd1498Szrj   basic_block pro = 0;
721*38fd1498Szrj 
722*38fd1498Szrj   basic_block bb;
723*38fd1498Szrj   edge e;
724*38fd1498Szrj   edge_iterator ei;
725*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
726*38fd1498Szrj     {
727*38fd1498Szrj       rtx_insn *insn;
728*38fd1498Szrj       FOR_BB_INSNS (bb, insn)
729*38fd1498Szrj 	if (NONDEBUG_INSN_P (insn)
730*38fd1498Szrj 	    && requires_stack_frame_p (insn, prologue_used,
731*38fd1498Szrj 				       set_up_by_prologue.set))
732*38fd1498Szrj 	  {
733*38fd1498Szrj 	    if (dump_file)
734*38fd1498Szrj 	      fprintf (dump_file, "Block %d needs the prologue.\n", bb->index);
735*38fd1498Szrj 	    pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
736*38fd1498Szrj 	    break;
737*38fd1498Szrj 	  }
738*38fd1498Szrj     }
739*38fd1498Szrj 
740*38fd1498Szrj   /* If nothing needs a prologue, just put it at the start.  This really
741*38fd1498Szrj      shouldn't happen, but we cannot fix it here.  */
742*38fd1498Szrj 
743*38fd1498Szrj   if (pro == 0)
744*38fd1498Szrj     {
745*38fd1498Szrj       if (dump_file)
746*38fd1498Szrj 	fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
747*38fd1498Szrj 			   "putting it at the start.\n");
748*38fd1498Szrj       pro = entry;
749*38fd1498Szrj     }
750*38fd1498Szrj 
751*38fd1498Szrj   if (dump_file)
752*38fd1498Szrj     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
753*38fd1498Szrj 	     pro->index);
754*38fd1498Szrj 
755*38fd1498Szrj   /* Now see if we can put the prologue at the start of PRO.  Putting it
756*38fd1498Szrj      there might require duplicating a block that cannot be duplicated,
757*38fd1498Szrj      or in some cases we cannot insert the prologue there at all.  If PRO
758*38fd1498Szrj      wont't do, try again with the immediate dominator of PRO, and so on.
759*38fd1498Szrj 
760*38fd1498Szrj      The blocks that need duplicating are those reachable from PRO but
761*38fd1498Szrj      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
762*38fd1498Szrj      reachable from PRO that we already found, and in VEC a stack of
763*38fd1498Szrj      those we still need to consider (to find successors).  */
764*38fd1498Szrj 
765*38fd1498Szrj   auto_bitmap bb_with;
766*38fd1498Szrj   bitmap_set_bit (bb_with, pro->index);
767*38fd1498Szrj 
768*38fd1498Szrj   vec<basic_block> vec;
769*38fd1498Szrj   vec.create (n_basic_blocks_for_fn (cfun));
770*38fd1498Szrj   vec.quick_push (pro);
771*38fd1498Szrj 
772*38fd1498Szrj   unsigned max_grow_size = get_uncond_jump_length ();
773*38fd1498Szrj   max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
774*38fd1498Szrj 
775*38fd1498Szrj   while (!vec.is_empty () && pro != entry)
776*38fd1498Szrj     {
777*38fd1498Szrj       while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
778*38fd1498Szrj 	{
779*38fd1498Szrj 	  pro = get_immediate_dominator (CDI_DOMINATORS, pro);
780*38fd1498Szrj 
781*38fd1498Szrj 	  if (bitmap_set_bit (bb_with, pro->index))
782*38fd1498Szrj 	    vec.quick_push (pro);
783*38fd1498Szrj 	}
784*38fd1498Szrj 
785*38fd1498Szrj       basic_block bb = vec.pop ();
786*38fd1498Szrj       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
787*38fd1498Szrj 	while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
788*38fd1498Szrj 	  {
789*38fd1498Szrj 	    gcc_assert (pro != entry);
790*38fd1498Szrj 
791*38fd1498Szrj 	    pro = get_immediate_dominator (CDI_DOMINATORS, pro);
792*38fd1498Szrj 
793*38fd1498Szrj 	    if (bitmap_set_bit (bb_with, pro->index))
794*38fd1498Szrj 	      vec.quick_push (pro);
795*38fd1498Szrj 	  }
796*38fd1498Szrj 
797*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
798*38fd1498Szrj 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
799*38fd1498Szrj 	    && bitmap_set_bit (bb_with, e->dest->index))
800*38fd1498Szrj 	  vec.quick_push (e->dest);
801*38fd1498Szrj     }
802*38fd1498Szrj 
803*38fd1498Szrj   if (dump_file)
804*38fd1498Szrj     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
805*38fd1498Szrj 	     pro->index);
806*38fd1498Szrj 
807*38fd1498Szrj   /* If we can move PRO back without having to duplicate more blocks, do so.
808*38fd1498Szrj      We do this because putting the prologue earlier is better for scheduling.
809*38fd1498Szrj 
810*38fd1498Szrj      We can move back to a block PRE if every path from PRE will eventually
811*38fd1498Szrj      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
812*38fd1498Szrj      to dominate every block reachable from itself.  We keep in BB_TMP a
813*38fd1498Szrj      bitmap of the blocks reachable from PRE that we already found, and in
814*38fd1498Szrj      VEC a stack of those we still need to consider.
815*38fd1498Szrj 
816*38fd1498Szrj      Any block reachable from PRE is also reachable from all predecessors
817*38fd1498Szrj      of PRE, so if we find we need to move PRE back further we can leave
818*38fd1498Szrj      everything not considered so far on the stack.  Any block dominated
819*38fd1498Szrj      by PRE is also dominated by all other dominators of PRE, so anything
820*38fd1498Szrj      found good for some PRE does not need to be reconsidered later.
821*38fd1498Szrj 
822*38fd1498Szrj      We don't need to update BB_WITH because none of the new blocks found
823*38fd1498Szrj      can jump to a block that does not need the prologue.  */
824*38fd1498Szrj 
825*38fd1498Szrj   if (pro != entry)
826*38fd1498Szrj     {
827*38fd1498Szrj       calculate_dominance_info (CDI_POST_DOMINATORS);
828*38fd1498Szrj 
829*38fd1498Szrj       auto_bitmap bb_tmp;
830*38fd1498Szrj       bitmap_copy (bb_tmp, bb_with);
831*38fd1498Szrj       basic_block last_ok = pro;
832*38fd1498Szrj       vec.truncate (0);
833*38fd1498Szrj 
834*38fd1498Szrj       while (pro != entry)
835*38fd1498Szrj 	{
836*38fd1498Szrj 	  basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
837*38fd1498Szrj 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
838*38fd1498Szrj 	    break;
839*38fd1498Szrj 
840*38fd1498Szrj 	  if (bitmap_set_bit (bb_tmp, pre->index))
841*38fd1498Szrj 	    vec.quick_push (pre);
842*38fd1498Szrj 
843*38fd1498Szrj 	  bool ok = true;
844*38fd1498Szrj 	  while (!vec.is_empty ())
845*38fd1498Szrj 	    {
846*38fd1498Szrj 	      if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
847*38fd1498Szrj 		{
848*38fd1498Szrj 		  ok = false;
849*38fd1498Szrj 		  break;
850*38fd1498Szrj 		}
851*38fd1498Szrj 
852*38fd1498Szrj 	      basic_block bb = vec.pop ();
853*38fd1498Szrj 	      FOR_EACH_EDGE (e, ei, bb->succs)
854*38fd1498Szrj 		if (bitmap_set_bit (bb_tmp, e->dest->index))
855*38fd1498Szrj 		  vec.quick_push (e->dest);
856*38fd1498Szrj 	    }
857*38fd1498Szrj 
858*38fd1498Szrj 	  if (ok && can_get_prologue (pre, prologue_clobbered))
859*38fd1498Szrj 	    last_ok = pre;
860*38fd1498Szrj 
861*38fd1498Szrj 	  pro = pre;
862*38fd1498Szrj 	}
863*38fd1498Szrj 
864*38fd1498Szrj       pro = last_ok;
865*38fd1498Szrj 
866*38fd1498Szrj       free_dominance_info (CDI_POST_DOMINATORS);
867*38fd1498Szrj     }
868*38fd1498Szrj 
869*38fd1498Szrj   vec.release ();
870*38fd1498Szrj 
871*38fd1498Szrj   if (dump_file)
872*38fd1498Szrj     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
873*38fd1498Szrj 	     pro->index);
874*38fd1498Szrj 
875*38fd1498Szrj   if (pro == entry)
876*38fd1498Szrj     {
877*38fd1498Szrj       free_dominance_info (CDI_DOMINATORS);
878*38fd1498Szrj       return;
879*38fd1498Szrj     }
880*38fd1498Szrj 
881*38fd1498Szrj   /* Compute what fraction of the frequency and count of the blocks that run
882*38fd1498Szrj      both with and without prologue are for running with prologue.  This gives
883*38fd1498Szrj      the correct answer for reducible flow graphs; for irreducible flow graphs
884*38fd1498Szrj      our profile is messed up beyond repair anyway.  */
885*38fd1498Szrj 
886*38fd1498Szrj   profile_count num = profile_count::zero ();
887*38fd1498Szrj   profile_count den = profile_count::zero ();
888*38fd1498Szrj 
889*38fd1498Szrj   FOR_EACH_EDGE (e, ei, pro->preds)
890*38fd1498Szrj     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
891*38fd1498Szrj       {
892*38fd1498Szrj 	if (e->count ().initialized_p ())
893*38fd1498Szrj 	  num += e->count ();
894*38fd1498Szrj 	if (e->src->count.initialized_p ())
895*38fd1498Szrj 	  den += e->src->count;
896*38fd1498Szrj       }
897*38fd1498Szrj 
898*38fd1498Szrj   /* All is okay, so do it.  */
899*38fd1498Szrj 
900*38fd1498Szrj   crtl->shrink_wrapped = true;
901*38fd1498Szrj   if (dump_file)
902*38fd1498Szrj     fprintf (dump_file, "Performing shrink-wrapping.\n");
903*38fd1498Szrj 
904*38fd1498Szrj   /* Copy the blocks that can run both with and without prologue.  The
905*38fd1498Szrj      originals run with prologue, the copies without.  Store a pointer to
906*38fd1498Szrj      the copy in the ->aux field of the original.  */
907*38fd1498Szrj 
908*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
909*38fd1498Szrj     if (bitmap_bit_p (bb_with, bb->index)
910*38fd1498Szrj 	&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
911*38fd1498Szrj       {
912*38fd1498Szrj 	basic_block dup = duplicate_block (bb, 0, 0);
913*38fd1498Szrj 
914*38fd1498Szrj 	bb->aux = dup;
915*38fd1498Szrj 
916*38fd1498Szrj 	if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
917*38fd1498Szrj 	  emit_barrier_after_bb (dup);
918*38fd1498Szrj 
919*38fd1498Szrj 	if (EDGE_COUNT (dup->succs) == 0)
920*38fd1498Szrj 	  emit_barrier_after_bb (dup);
921*38fd1498Szrj 
922*38fd1498Szrj 	if (dump_file)
923*38fd1498Szrj 	  fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
924*38fd1498Szrj 
925*38fd1498Szrj 	if (num == profile_count::zero () || den.nonzero_p ())
926*38fd1498Szrj 	  bb->count = bb->count.apply_scale (num, den);
927*38fd1498Szrj 	dup->count -= bb->count;
928*38fd1498Szrj       }
929*38fd1498Szrj 
930*38fd1498Szrj   /* Now change the edges to point to the copies, where appropriate.  */
931*38fd1498Szrj 
932*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
933*38fd1498Szrj     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
934*38fd1498Szrj       {
935*38fd1498Szrj 	basic_block src = bb;
936*38fd1498Szrj 	if (bitmap_bit_p (bb_with, bb->index))
937*38fd1498Szrj 	  src = (basic_block) bb->aux;
938*38fd1498Szrj 
939*38fd1498Szrj 	FOR_EACH_EDGE (e, ei, src->succs)
940*38fd1498Szrj 	  {
941*38fd1498Szrj 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
942*38fd1498Szrj 	      continue;
943*38fd1498Szrj 
944*38fd1498Szrj 	    if (bitmap_bit_p (bb_with, e->dest->index)
945*38fd1498Szrj 		&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
946*38fd1498Szrj 	      {
947*38fd1498Szrj 		if (dump_file)
948*38fd1498Szrj 		  fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
949*38fd1498Szrj 			   e->src->index, e->dest->index,
950*38fd1498Szrj 			   ((basic_block) e->dest->aux)->index);
951*38fd1498Szrj 		redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
952*38fd1498Szrj 	      }
953*38fd1498Szrj 	    else if (e->flags & EDGE_FALLTHRU
954*38fd1498Szrj 		     && bitmap_bit_p (bb_with, bb->index))
955*38fd1498Szrj 	      force_nonfallthru (e);
956*38fd1498Szrj 	  }
957*38fd1498Szrj       }
958*38fd1498Szrj 
959*38fd1498Szrj   /* Also redirect the function entry edge if necessary.  */
960*38fd1498Szrj 
961*38fd1498Szrj   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
962*38fd1498Szrj     if (bitmap_bit_p (bb_with, e->dest->index)
963*38fd1498Szrj 	&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
964*38fd1498Szrj       {
965*38fd1498Szrj 	basic_block split_bb = split_edge (e);
966*38fd1498Szrj 	e = single_succ_edge (split_bb);
967*38fd1498Szrj 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
968*38fd1498Szrj       }
969*38fd1498Szrj 
970*38fd1498Szrj   /* Make a simple_return for those exits that run without prologue.  */
971*38fd1498Szrj 
972*38fd1498Szrj   FOR_EACH_BB_REVERSE_FN (bb, cfun)
973*38fd1498Szrj     if (!bitmap_bit_p (bb_with, bb->index))
974*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
975*38fd1498Szrj 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
976*38fd1498Szrj 	  handle_simple_exit (e);
977*38fd1498Szrj 
978*38fd1498Szrj   /* Finally, we want a single edge to put the prologue on.  Make a new
979*38fd1498Szrj      block before the PRO block; the edge beteen them is the edge we want.
980*38fd1498Szrj      Then redirect those edges into PRO that come from blocks without the
981*38fd1498Szrj      prologue, to point to the new block instead.  The new prologue block
982*38fd1498Szrj      is put at the end of the insn chain.  */
983*38fd1498Szrj 
984*38fd1498Szrj   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
985*38fd1498Szrj   BB_COPY_PARTITION (new_bb, pro);
986*38fd1498Szrj   new_bb->count = profile_count::zero ();
987*38fd1498Szrj   if (dump_file)
988*38fd1498Szrj     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
989*38fd1498Szrj 
990*38fd1498Szrj   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
991*38fd1498Szrj     {
992*38fd1498Szrj       if (bitmap_bit_p (bb_with, e->src->index)
993*38fd1498Szrj 	  || dominated_by_p (CDI_DOMINATORS, e->src, pro))
994*38fd1498Szrj 	{
995*38fd1498Szrj 	  ei_next (&ei);
996*38fd1498Szrj 	  continue;
997*38fd1498Szrj 	}
998*38fd1498Szrj 
999*38fd1498Szrj       new_bb->count += e->count ();
1000*38fd1498Szrj 
1001*38fd1498Szrj       redirect_edge_and_branch_force (e, new_bb);
1002*38fd1498Szrj       if (dump_file)
1003*38fd1498Szrj 	fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1004*38fd1498Szrj     }
1005*38fd1498Szrj 
1006*38fd1498Szrj   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1007*38fd1498Szrj   force_nonfallthru (*entry_edge);
1008*38fd1498Szrj 
1009*38fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
1010*38fd1498Szrj }
1011*38fd1498Szrj 
1012*38fd1498Szrj /* Separate shrink-wrapping
1013*38fd1498Szrj 
1014*38fd1498Szrj    Instead of putting all of the prologue and epilogue in one spot, we
1015*38fd1498Szrj    can put parts of it in places where those components are executed less
1016*38fd1498Szrj    frequently.  The following code does this, for prologue and epilogue
1017*38fd1498Szrj    components that can be put in more than one location, and where those
1018*38fd1498Szrj    components can be executed more than once (the epilogue component will
1019*38fd1498Szrj    always be executed before the prologue component is executed a second
1020*38fd1498Szrj    time).
1021*38fd1498Szrj 
1022*38fd1498Szrj    What exactly is a component is target-dependent.  The more usual
1023*38fd1498Szrj    components are simple saves/restores to/from the frame of callee-saved
1024*38fd1498Szrj    registers.  This code treats components abstractly (as an sbitmap),
1025*38fd1498Szrj    letting the target handle all details.
1026*38fd1498Szrj 
1027*38fd1498Szrj    Prologue components are placed in such a way that for every component
1028*38fd1498Szrj    the prologue is executed as infrequently as possible.  We do this by
1029*38fd1498Szrj    walking the dominator tree, comparing the cost of placing a prologue
1030*38fd1498Szrj    component before a block to the sum of costs determined for all subtrees
1031*38fd1498Szrj    of that block.
1032*38fd1498Szrj 
1033*38fd1498Szrj    From this placement, we then determine for each component all blocks
1034*38fd1498Szrj    where at least one of this block's dominators (including itself) will
1035*38fd1498Szrj    get a prologue inserted.  That then is how the components are placed.
1036*38fd1498Szrj    We could place the epilogue components a bit smarter (we can save a
1037*38fd1498Szrj    bit of code size sometimes); this is a possible future improvement.
1038*38fd1498Szrj 
1039*38fd1498Szrj    Prologues and epilogues are preferably placed into a block, either at
1040*38fd1498Szrj    the beginning or end of it, if it is needed for all predecessor resp.
1041*38fd1498Szrj    successor edges; or placed on the edge otherwise.
1042*38fd1498Szrj 
1043*38fd1498Szrj    If the placement of any prologue/epilogue leads to a situation we cannot
1044*38fd1498Szrj    handle (for example, an abnormal edge would need to be split, or some
1045*38fd1498Szrj    targets want to use some specific registers that may not be available
1046*38fd1498Szrj    where we want to put them), separate shrink-wrapping for the components
1047*38fd1498Szrj    in that prologue/epilogue is aborted.  */
1048*38fd1498Szrj 
1049*38fd1498Szrj 
1050*38fd1498Szrj /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1051*38fd1498Szrj    label LABEL.  */
1052*38fd1498Szrj static void
dump_components(const char * label,sbitmap components)1053*38fd1498Szrj dump_components (const char *label, sbitmap components)
1054*38fd1498Szrj {
1055*38fd1498Szrj   if (bitmap_empty_p (components))
1056*38fd1498Szrj     return;
1057*38fd1498Szrj 
1058*38fd1498Szrj   fprintf (dump_file, " [%s", label);
1059*38fd1498Szrj 
1060*38fd1498Szrj   for (unsigned int j = 0; j < components->n_bits; j++)
1061*38fd1498Szrj     if (bitmap_bit_p (components, j))
1062*38fd1498Szrj       fprintf (dump_file, " %u", j);
1063*38fd1498Szrj 
1064*38fd1498Szrj   fprintf (dump_file, "]");
1065*38fd1498Szrj }
1066*38fd1498Szrj 
1067*38fd1498Szrj /* The data we collect for each bb.  */
1068*38fd1498Szrj struct sw {
1069*38fd1498Szrj   /* What components does this BB need?  */
1070*38fd1498Szrj   sbitmap needs_components;
1071*38fd1498Szrj 
1072*38fd1498Szrj   /* What components does this BB have?  This is the main decision this
1073*38fd1498Szrj      pass makes.  */
1074*38fd1498Szrj   sbitmap has_components;
1075*38fd1498Szrj 
1076*38fd1498Szrj   /* The components for which we placed code at the start of the BB (instead
1077*38fd1498Szrj      of on all incoming edges).  */
1078*38fd1498Szrj   sbitmap head_components;
1079*38fd1498Szrj 
1080*38fd1498Szrj   /* The components for which we placed code at the end of the BB (instead
1081*38fd1498Szrj      of on all outgoing edges).  */
1082*38fd1498Szrj   sbitmap tail_components;
1083*38fd1498Szrj 
1084*38fd1498Szrj   /* The frequency of executing the prologue for this BB, if a prologue is
1085*38fd1498Szrj      placed on this BB.  This is a pessimistic estimate (no prologue is
1086*38fd1498Szrj      needed for edges from blocks that have the component under consideration
1087*38fd1498Szrj      active already).  */
1088*38fd1498Szrj   gcov_type own_cost;
1089*38fd1498Szrj 
1090*38fd1498Szrj   /* The frequency of executing the prologue for this BB and all BBs
1091*38fd1498Szrj      dominated by it.  */
1092*38fd1498Szrj   gcov_type total_cost;
1093*38fd1498Szrj };
1094*38fd1498Szrj 
1095*38fd1498Szrj /* A helper function for accessing the pass-specific info.  */
1096*38fd1498Szrj static inline struct sw *
SW(basic_block bb)1097*38fd1498Szrj SW (basic_block bb)
1098*38fd1498Szrj {
1099*38fd1498Szrj   gcc_assert (bb->aux);
1100*38fd1498Szrj   return (struct sw *) bb->aux;
1101*38fd1498Szrj }
1102*38fd1498Szrj 
1103*38fd1498Szrj /* Create the pass-specific data structures for separately shrink-wrapping
1104*38fd1498Szrj    with components COMPONENTS.  */
1105*38fd1498Szrj static void
init_separate_shrink_wrap(sbitmap components)1106*38fd1498Szrj init_separate_shrink_wrap (sbitmap components)
1107*38fd1498Szrj {
1108*38fd1498Szrj   basic_block bb;
1109*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1110*38fd1498Szrj     {
1111*38fd1498Szrj       bb->aux = xcalloc (1, sizeof (struct sw));
1112*38fd1498Szrj 
1113*38fd1498Szrj       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1114*38fd1498Szrj 
1115*38fd1498Szrj       /* Mark all basic blocks without successor as needing all components.
1116*38fd1498Szrj 	 This avoids problems in at least cfgcleanup, sel-sched, and
1117*38fd1498Szrj 	 regrename (largely to do with all paths to such a block still
1118*38fd1498Szrj 	 needing the same dwarf CFI info).  */
1119*38fd1498Szrj       if (EDGE_COUNT (bb->succs) == 0)
1120*38fd1498Szrj 	bitmap_copy (SW (bb)->needs_components, components);
1121*38fd1498Szrj 
1122*38fd1498Szrj       if (dump_file)
1123*38fd1498Szrj 	{
1124*38fd1498Szrj 	  fprintf (dump_file, "bb %d components:", bb->index);
1125*38fd1498Szrj 	  dump_components ("has", SW (bb)->needs_components);
1126*38fd1498Szrj 	  fprintf (dump_file, "\n");
1127*38fd1498Szrj 	}
1128*38fd1498Szrj 
1129*38fd1498Szrj       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1130*38fd1498Szrj       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1131*38fd1498Szrj       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1132*38fd1498Szrj       bitmap_clear (SW (bb)->has_components);
1133*38fd1498Szrj     }
1134*38fd1498Szrj }
1135*38fd1498Szrj 
1136*38fd1498Szrj /* Destroy the pass-specific data.  */
1137*38fd1498Szrj static void
fini_separate_shrink_wrap(void)1138*38fd1498Szrj fini_separate_shrink_wrap (void)
1139*38fd1498Szrj {
1140*38fd1498Szrj   basic_block bb;
1141*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1142*38fd1498Szrj     if (bb->aux)
1143*38fd1498Szrj       {
1144*38fd1498Szrj 	sbitmap_free (SW (bb)->needs_components);
1145*38fd1498Szrj 	sbitmap_free (SW (bb)->has_components);
1146*38fd1498Szrj 	sbitmap_free (SW (bb)->head_components);
1147*38fd1498Szrj 	sbitmap_free (SW (bb)->tail_components);
1148*38fd1498Szrj 	free (bb->aux);
1149*38fd1498Szrj 	bb->aux = 0;
1150*38fd1498Szrj       }
1151*38fd1498Szrj }
1152*38fd1498Szrj 
1153*38fd1498Szrj /* Place the prologue for component WHICH, in the basic blocks dominated
1154*38fd1498Szrj    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
1155*38fd1498Szrj    HAS_COMPONENTS of a block if either the block has that bit set in
1156*38fd1498Szrj    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1157*38fd1498Szrj    dominator subtrees separately.  */
1158*38fd1498Szrj static void
place_prologue_for_one_component(unsigned int which,basic_block head)1159*38fd1498Szrj place_prologue_for_one_component (unsigned int which, basic_block head)
1160*38fd1498Szrj {
1161*38fd1498Szrj   /* The block we are currently dealing with.  */
1162*38fd1498Szrj   basic_block bb = head;
1163*38fd1498Szrj   /* Is this the first time we visit this block, i.e. have we just gone
1164*38fd1498Szrj      down the tree.  */
1165*38fd1498Szrj   bool first_visit = true;
1166*38fd1498Szrj 
1167*38fd1498Szrj   /* Walk the dominator tree, visit one block per iteration of this loop.
1168*38fd1498Szrj      Each basic block is visited twice: once before visiting any children
1169*38fd1498Szrj      of the block, and once after visiting all of them (leaf nodes are
1170*38fd1498Szrj      visited only once).  As an optimization, we do not visit subtrees
1171*38fd1498Szrj      that can no longer influence the prologue placement.  */
1172*38fd1498Szrj   for (;;)
1173*38fd1498Szrj     {
1174*38fd1498Szrj       /* First visit of a block: set the (children) cost accumulator to zero;
1175*38fd1498Szrj 	 if the block does not have the component itself, walk down.  */
1176*38fd1498Szrj       if (first_visit)
1177*38fd1498Szrj 	{
1178*38fd1498Szrj 	  /* Initialize the cost.  The cost is the block execution frequency
1179*38fd1498Szrj 	     that does not come from backedges.  Calculating this by simply
1180*38fd1498Szrj 	     adding the cost of all edges that aren't backedges does not
1181*38fd1498Szrj 	     work: this does not always add up to the block frequency at
1182*38fd1498Szrj 	     all, and even if it does, rounding error makes for bad
1183*38fd1498Szrj 	     decisions.  */
1184*38fd1498Szrj 	  SW (bb)->own_cost = bb->count.to_frequency (cfun);
1185*38fd1498Szrj 
1186*38fd1498Szrj 	  edge e;
1187*38fd1498Szrj 	  edge_iterator ei;
1188*38fd1498Szrj 	  FOR_EACH_EDGE (e, ei, bb->preds)
1189*38fd1498Szrj 	    if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1190*38fd1498Szrj 	      {
1191*38fd1498Szrj 		if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1192*38fd1498Szrj 		  SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1193*38fd1498Szrj 		else
1194*38fd1498Szrj 		  SW (bb)->own_cost = 0;
1195*38fd1498Szrj 	      }
1196*38fd1498Szrj 
1197*38fd1498Szrj 	  SW (bb)->total_cost = 0;
1198*38fd1498Szrj 
1199*38fd1498Szrj 	  if (!bitmap_bit_p (SW (bb)->needs_components, which)
1200*38fd1498Szrj 	      && first_dom_son (CDI_DOMINATORS, bb))
1201*38fd1498Szrj 	    {
1202*38fd1498Szrj 	      bb = first_dom_son (CDI_DOMINATORS, bb);
1203*38fd1498Szrj 	      continue;
1204*38fd1498Szrj 	    }
1205*38fd1498Szrj 	}
1206*38fd1498Szrj 
1207*38fd1498Szrj       /* If this block does need the component itself, or it is cheaper to
1208*38fd1498Szrj 	 put the prologue here than in all the descendants that need it,
1209*38fd1498Szrj 	 mark it so.  If this block's immediate post-dominator is dominated
1210*38fd1498Szrj 	 by this block, and that needs the prologue, we can put it on this
1211*38fd1498Szrj 	 block as well (earlier is better).  */
1212*38fd1498Szrj       if (bitmap_bit_p (SW (bb)->needs_components, which)
1213*38fd1498Szrj 	  || SW (bb)->total_cost > SW (bb)->own_cost)
1214*38fd1498Szrj 	{
1215*38fd1498Szrj 	  SW (bb)->total_cost = SW (bb)->own_cost;
1216*38fd1498Szrj 	  bitmap_set_bit (SW (bb)->has_components, which);
1217*38fd1498Szrj 	}
1218*38fd1498Szrj       else
1219*38fd1498Szrj 	{
1220*38fd1498Szrj 	  basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1221*38fd1498Szrj 	  if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1222*38fd1498Szrj 	      && bitmap_bit_p (SW (kid)->has_components, which))
1223*38fd1498Szrj 	    {
1224*38fd1498Szrj 	      SW (bb)->total_cost = SW (bb)->own_cost;
1225*38fd1498Szrj 	      bitmap_set_bit (SW (bb)->has_components, which);
1226*38fd1498Szrj 	    }
1227*38fd1498Szrj 	}
1228*38fd1498Szrj 
1229*38fd1498Szrj       /* We are back where we started, so we are done now.  */
1230*38fd1498Szrj       if (bb == head)
1231*38fd1498Szrj 	return;
1232*38fd1498Szrj 
1233*38fd1498Szrj       /* We now know the cost of the subtree rooted at the current block.
1234*38fd1498Szrj 	 Accumulate this cost in the parent.  */
1235*38fd1498Szrj       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1236*38fd1498Szrj       SW (parent)->total_cost += SW (bb)->total_cost;
1237*38fd1498Szrj 
1238*38fd1498Szrj       /* Don't walk the tree down unless necessary.  */
1239*38fd1498Szrj       if (next_dom_son (CDI_DOMINATORS, bb)
1240*38fd1498Szrj           && SW (parent)->total_cost <= SW (parent)->own_cost)
1241*38fd1498Szrj 	{
1242*38fd1498Szrj 	  bb = next_dom_son (CDI_DOMINATORS, bb);
1243*38fd1498Szrj 	  first_visit = true;
1244*38fd1498Szrj 	}
1245*38fd1498Szrj       else
1246*38fd1498Szrj 	{
1247*38fd1498Szrj 	  bb = parent;
1248*38fd1498Szrj 	  first_visit = false;
1249*38fd1498Szrj 	}
1250*38fd1498Szrj     }
1251*38fd1498Szrj }
1252*38fd1498Szrj 
1253*38fd1498Szrj /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1254*38fd1498Szrj    setting it on any path from entry to exit where it was not already set
1255*38fd1498Szrj    somewhere (or, for blocks that have no path to the exit, consider only
1256*38fd1498Szrj    paths from the entry to the block itself).  */
1257*38fd1498Szrj static void
spread_components(sbitmap components)1258*38fd1498Szrj spread_components (sbitmap components)
1259*38fd1498Szrj {
1260*38fd1498Szrj   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1261*38fd1498Szrj   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1262*38fd1498Szrj 
1263*38fd1498Szrj   /* A stack of all blocks left to consider, and a bitmap of all blocks
1264*38fd1498Szrj      on that stack.  */
1265*38fd1498Szrj   vec<basic_block> todo;
1266*38fd1498Szrj   todo.create (n_basic_blocks_for_fn (cfun));
1267*38fd1498Szrj   auto_bitmap seen;
1268*38fd1498Szrj 
1269*38fd1498Szrj   auto_sbitmap old (SBITMAP_SIZE (components));
1270*38fd1498Szrj 
1271*38fd1498Szrj   /* Find for every block the components that are *not* needed on some path
1272*38fd1498Szrj      from the entry to that block.  Do this with a flood fill from the entry
1273*38fd1498Szrj      block.  Every block can be visited at most as often as the number of
1274*38fd1498Szrj      components (plus one), and usually much less often.  */
1275*38fd1498Szrj 
1276*38fd1498Szrj   if (dump_file)
1277*38fd1498Szrj     fprintf (dump_file, "Spreading down...\n");
1278*38fd1498Szrj 
1279*38fd1498Szrj   basic_block bb;
1280*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1281*38fd1498Szrj     bitmap_clear (SW (bb)->head_components);
1282*38fd1498Szrj 
1283*38fd1498Szrj   bitmap_copy (SW (entry_block)->head_components, components);
1284*38fd1498Szrj 
1285*38fd1498Szrj   edge e;
1286*38fd1498Szrj   edge_iterator ei;
1287*38fd1498Szrj 
1288*38fd1498Szrj   todo.quick_push (single_succ (entry_block));
1289*38fd1498Szrj   bitmap_set_bit (seen, single_succ (entry_block)->index);
1290*38fd1498Szrj   while (!todo.is_empty ())
1291*38fd1498Szrj     {
1292*38fd1498Szrj       bb = todo.pop ();
1293*38fd1498Szrj 
1294*38fd1498Szrj       bitmap_copy (old, SW (bb)->head_components);
1295*38fd1498Szrj 
1296*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
1297*38fd1498Szrj 	bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1298*38fd1498Szrj 		    SW (e->src)->head_components);
1299*38fd1498Szrj 
1300*38fd1498Szrj       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1301*38fd1498Szrj 			SW (bb)->has_components);
1302*38fd1498Szrj 
1303*38fd1498Szrj       if (!bitmap_equal_p (old, SW (bb)->head_components))
1304*38fd1498Szrj 	FOR_EACH_EDGE (e, ei, bb->succs)
1305*38fd1498Szrj 	  if (bitmap_set_bit (seen, e->dest->index))
1306*38fd1498Szrj 	    todo.quick_push (e->dest);
1307*38fd1498Szrj 
1308*38fd1498Szrj       bitmap_clear_bit (seen, bb->index);
1309*38fd1498Szrj     }
1310*38fd1498Szrj 
1311*38fd1498Szrj   /* Find for every block the components that are *not* needed on some reverse
1312*38fd1498Szrj      path from the exit to that block.  */
1313*38fd1498Szrj 
1314*38fd1498Szrj   if (dump_file)
1315*38fd1498Szrj     fprintf (dump_file, "Spreading up...\n");
1316*38fd1498Szrj 
1317*38fd1498Szrj   /* First, mark all blocks not reachable from the exit block as not needing
1318*38fd1498Szrj      any component on any path to the exit.  Mark everything, and then clear
1319*38fd1498Szrj      again by a flood fill.  */
1320*38fd1498Szrj 
1321*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1322*38fd1498Szrj     bitmap_copy (SW (bb)->tail_components, components);
1323*38fd1498Szrj 
1324*38fd1498Szrj   FOR_EACH_EDGE (e, ei, exit_block->preds)
1325*38fd1498Szrj     {
1326*38fd1498Szrj       todo.quick_push (e->src);
1327*38fd1498Szrj       bitmap_set_bit (seen, e->src->index);
1328*38fd1498Szrj     }
1329*38fd1498Szrj 
1330*38fd1498Szrj   while (!todo.is_empty ())
1331*38fd1498Szrj     {
1332*38fd1498Szrj       bb = todo.pop ();
1333*38fd1498Szrj 
1334*38fd1498Szrj       if (!bitmap_empty_p (SW (bb)->tail_components))
1335*38fd1498Szrj 	FOR_EACH_EDGE (e, ei, bb->preds)
1336*38fd1498Szrj 	  if (bitmap_set_bit (seen, e->src->index))
1337*38fd1498Szrj 	    todo.quick_push (e->src);
1338*38fd1498Szrj 
1339*38fd1498Szrj       bitmap_clear (SW (bb)->tail_components);
1340*38fd1498Szrj 
1341*38fd1498Szrj       bitmap_clear_bit (seen, bb->index);
1342*38fd1498Szrj     }
1343*38fd1498Szrj 
1344*38fd1498Szrj   /* And then, flood fill backwards to find for every block the components
1345*38fd1498Szrj      not needed on some path to the exit.  */
1346*38fd1498Szrj 
1347*38fd1498Szrj   bitmap_copy (SW (exit_block)->tail_components, components);
1348*38fd1498Szrj 
1349*38fd1498Szrj   FOR_EACH_EDGE (e, ei, exit_block->preds)
1350*38fd1498Szrj     {
1351*38fd1498Szrj       todo.quick_push (e->src);
1352*38fd1498Szrj       bitmap_set_bit (seen, e->src->index);
1353*38fd1498Szrj     }
1354*38fd1498Szrj 
1355*38fd1498Szrj   while (!todo.is_empty ())
1356*38fd1498Szrj     {
1357*38fd1498Szrj       bb = todo.pop ();
1358*38fd1498Szrj 
1359*38fd1498Szrj       bitmap_copy (old, SW (bb)->tail_components);
1360*38fd1498Szrj 
1361*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1362*38fd1498Szrj 	bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1363*38fd1498Szrj 		    SW (e->dest)->tail_components);
1364*38fd1498Szrj 
1365*38fd1498Szrj       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1366*38fd1498Szrj 			SW (bb)->has_components);
1367*38fd1498Szrj 
1368*38fd1498Szrj       if (!bitmap_equal_p (old, SW (bb)->tail_components))
1369*38fd1498Szrj 	FOR_EACH_EDGE (e, ei, bb->preds)
1370*38fd1498Szrj 	  if (bitmap_set_bit (seen, e->src->index))
1371*38fd1498Szrj 	    todo.quick_push (e->src);
1372*38fd1498Szrj 
1373*38fd1498Szrj       bitmap_clear_bit (seen, bb->index);
1374*38fd1498Szrj     }
1375*38fd1498Szrj 
1376*38fd1498Szrj   todo.release ();
1377*38fd1498Szrj 
1378*38fd1498Szrj   /* Finally, mark everything not not needed both forwards and backwards.  */
1379*38fd1498Szrj 
1380*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1381*38fd1498Szrj     {
1382*38fd1498Szrj       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1383*38fd1498Szrj 		  SW (bb)->tail_components);
1384*38fd1498Szrj       bitmap_and_compl (SW (bb)->has_components, components,
1385*38fd1498Szrj 			SW (bb)->head_components);
1386*38fd1498Szrj     }
1387*38fd1498Szrj 
1388*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1389*38fd1498Szrj     {
1390*38fd1498Szrj       if (dump_file)
1391*38fd1498Szrj 	{
1392*38fd1498Szrj 	  fprintf (dump_file, "bb %d components:", bb->index);
1393*38fd1498Szrj 	  dump_components ("has", SW (bb)->has_components);
1394*38fd1498Szrj 	  fprintf (dump_file, "\n");
1395*38fd1498Szrj 	}
1396*38fd1498Szrj     }
1397*38fd1498Szrj }
1398*38fd1498Szrj 
1399*38fd1498Szrj /* If we cannot handle placing some component's prologues or epilogues where
1400*38fd1498Szrj    we decided we should place them, unmark that component in COMPONENTS so
1401*38fd1498Szrj    that it is not wrapped separately.  */
1402*38fd1498Szrj static void
disqualify_problematic_components(sbitmap components)1403*38fd1498Szrj disqualify_problematic_components (sbitmap components)
1404*38fd1498Szrj {
1405*38fd1498Szrj   auto_sbitmap pro (SBITMAP_SIZE (components));
1406*38fd1498Szrj   auto_sbitmap epi (SBITMAP_SIZE (components));
1407*38fd1498Szrj 
1408*38fd1498Szrj   basic_block bb;
1409*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1410*38fd1498Szrj     {
1411*38fd1498Szrj       edge e;
1412*38fd1498Szrj       edge_iterator ei;
1413*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1414*38fd1498Szrj 	{
1415*38fd1498Szrj 	  /* Find which components we want pro/epilogues for here.  */
1416*38fd1498Szrj 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1417*38fd1498Szrj 			    SW (e->dest)->has_components);
1418*38fd1498Szrj 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1419*38fd1498Szrj 			    SW (e->src)->has_components);
1420*38fd1498Szrj 
1421*38fd1498Szrj 	  /* Ask the target what it thinks about things.  */
1422*38fd1498Szrj 	  if (!bitmap_empty_p (epi))
1423*38fd1498Szrj 	    targetm.shrink_wrap.disqualify_components (components, e, epi,
1424*38fd1498Szrj 						       false);
1425*38fd1498Szrj 	  if (!bitmap_empty_p (pro))
1426*38fd1498Szrj 	    targetm.shrink_wrap.disqualify_components (components, e, pro,
1427*38fd1498Szrj 						       true);
1428*38fd1498Szrj 
1429*38fd1498Szrj 	  /* If this edge doesn't need splitting, we're fine.  */
1430*38fd1498Szrj 	  if (single_pred_p (e->dest)
1431*38fd1498Szrj 	      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1432*38fd1498Szrj 	    continue;
1433*38fd1498Szrj 
1434*38fd1498Szrj 	  /* If the edge can be split, that is fine too.  */
1435*38fd1498Szrj 	  if ((e->flags & EDGE_ABNORMAL) == 0)
1436*38fd1498Szrj 	    continue;
1437*38fd1498Szrj 
1438*38fd1498Szrj 	  /* We also can handle sibcalls.  */
1439*38fd1498Szrj 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1440*38fd1498Szrj 	    {
1441*38fd1498Szrj 	      gcc_assert (e->flags & EDGE_SIBCALL);
1442*38fd1498Szrj 	      continue;
1443*38fd1498Szrj 	    }
1444*38fd1498Szrj 
1445*38fd1498Szrj 	  /* Remove from consideration those components we would need
1446*38fd1498Szrj 	     pro/epilogues for on edges where we cannot insert them.  */
1447*38fd1498Szrj 	  bitmap_and_compl (components, components, epi);
1448*38fd1498Szrj 	  bitmap_and_compl (components, components, pro);
1449*38fd1498Szrj 
1450*38fd1498Szrj 	  if (dump_file && !bitmap_subset_p (epi, components))
1451*38fd1498Szrj 	    {
1452*38fd1498Szrj 	      fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
1453*38fd1498Szrj 		       e->dest->index);
1454*38fd1498Szrj 	      if (e->flags & EDGE_EH)
1455*38fd1498Szrj 		fprintf (dump_file, " for EH");
1456*38fd1498Szrj 	      dump_components ("epi", epi);
1457*38fd1498Szrj 	      fprintf (dump_file, "\n");
1458*38fd1498Szrj 	    }
1459*38fd1498Szrj 
1460*38fd1498Szrj 	  if (dump_file && !bitmap_subset_p (pro, components))
1461*38fd1498Szrj 	    {
1462*38fd1498Szrj 	      fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
1463*38fd1498Szrj 		       e->dest->index);
1464*38fd1498Szrj 	      if (e->flags & EDGE_EH)
1465*38fd1498Szrj 		fprintf (dump_file, " for EH");
1466*38fd1498Szrj 	      dump_components ("pro", pro);
1467*38fd1498Szrj 	      fprintf (dump_file, "\n");
1468*38fd1498Szrj 	    }
1469*38fd1498Szrj 	}
1470*38fd1498Szrj     }
1471*38fd1498Szrj }
1472*38fd1498Szrj 
1473*38fd1498Szrj /* Place code for prologues and epilogues for COMPONENTS where we can put
1474*38fd1498Szrj    that code at the start of basic blocks.  */
1475*38fd1498Szrj static void
emit_common_heads_for_components(sbitmap components)1476*38fd1498Szrj emit_common_heads_for_components (sbitmap components)
1477*38fd1498Szrj {
1478*38fd1498Szrj   auto_sbitmap pro (SBITMAP_SIZE (components));
1479*38fd1498Szrj   auto_sbitmap epi (SBITMAP_SIZE (components));
1480*38fd1498Szrj   auto_sbitmap tmp (SBITMAP_SIZE (components));
1481*38fd1498Szrj 
1482*38fd1498Szrj   basic_block bb;
1483*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1484*38fd1498Szrj     bitmap_clear (SW (bb)->head_components);
1485*38fd1498Szrj 
1486*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1487*38fd1498Szrj     {
1488*38fd1498Szrj       /* Find which prologue resp. epilogue components are needed for all
1489*38fd1498Szrj 	 predecessor edges to this block.  */
1490*38fd1498Szrj 
1491*38fd1498Szrj       /* First, select all possible components.  */
1492*38fd1498Szrj       bitmap_copy (epi, components);
1493*38fd1498Szrj       bitmap_copy (pro, components);
1494*38fd1498Szrj 
1495*38fd1498Szrj       edge e;
1496*38fd1498Szrj       edge_iterator ei;
1497*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->preds)
1498*38fd1498Szrj 	{
1499*38fd1498Szrj 	  if (e->flags & EDGE_ABNORMAL)
1500*38fd1498Szrj 	    {
1501*38fd1498Szrj 	      bitmap_clear (epi);
1502*38fd1498Szrj 	      bitmap_clear (pro);
1503*38fd1498Szrj 	      break;
1504*38fd1498Szrj 	    }
1505*38fd1498Szrj 
1506*38fd1498Szrj 	  /* Deselect those epilogue components that should not be inserted
1507*38fd1498Szrj 	     for this edge.  */
1508*38fd1498Szrj 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1509*38fd1498Szrj 			    SW (e->dest)->has_components);
1510*38fd1498Szrj 	  bitmap_and (epi, epi, tmp);
1511*38fd1498Szrj 
1512*38fd1498Szrj 	  /* Similar, for the prologue.  */
1513*38fd1498Szrj 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1514*38fd1498Szrj 			    SW (e->src)->has_components);
1515*38fd1498Szrj 	  bitmap_and (pro, pro, tmp);
1516*38fd1498Szrj 	}
1517*38fd1498Szrj 
1518*38fd1498Szrj       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1519*38fd1498Szrj 	fprintf (dump_file, "  bb %d", bb->index);
1520*38fd1498Szrj 
1521*38fd1498Szrj       if (dump_file && !bitmap_empty_p (epi))
1522*38fd1498Szrj 	dump_components ("epi", epi);
1523*38fd1498Szrj       if (dump_file && !bitmap_empty_p (pro))
1524*38fd1498Szrj 	dump_components ("pro", pro);
1525*38fd1498Szrj 
1526*38fd1498Szrj       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1527*38fd1498Szrj 	fprintf (dump_file, "\n");
1528*38fd1498Szrj 
1529*38fd1498Szrj       /* Place code after the BB note.  */
1530*38fd1498Szrj       if (!bitmap_empty_p (pro))
1531*38fd1498Szrj 	{
1532*38fd1498Szrj 	  start_sequence ();
1533*38fd1498Szrj 	  targetm.shrink_wrap.emit_prologue_components (pro);
1534*38fd1498Szrj 	  rtx_insn *seq = get_insns ();
1535*38fd1498Szrj 	  end_sequence ();
1536*38fd1498Szrj 	  record_prologue_seq (seq);
1537*38fd1498Szrj 
1538*38fd1498Szrj 	  emit_insn_after (seq, bb_note (bb));
1539*38fd1498Szrj 
1540*38fd1498Szrj 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1541*38fd1498Szrj 	}
1542*38fd1498Szrj 
1543*38fd1498Szrj       if (!bitmap_empty_p (epi))
1544*38fd1498Szrj 	{
1545*38fd1498Szrj 	  start_sequence ();
1546*38fd1498Szrj 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1547*38fd1498Szrj 	  rtx_insn *seq = get_insns ();
1548*38fd1498Szrj 	  end_sequence ();
1549*38fd1498Szrj 	  record_epilogue_seq (seq);
1550*38fd1498Szrj 
1551*38fd1498Szrj 	  emit_insn_after (seq, bb_note (bb));
1552*38fd1498Szrj 
1553*38fd1498Szrj 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1554*38fd1498Szrj 	}
1555*38fd1498Szrj     }
1556*38fd1498Szrj }
1557*38fd1498Szrj 
1558*38fd1498Szrj /* Place code for prologues and epilogues for COMPONENTS where we can put
1559*38fd1498Szrj    that code at the end of basic blocks.  */
1560*38fd1498Szrj static void
emit_common_tails_for_components(sbitmap components)1561*38fd1498Szrj emit_common_tails_for_components (sbitmap components)
1562*38fd1498Szrj {
1563*38fd1498Szrj   auto_sbitmap pro (SBITMAP_SIZE (components));
1564*38fd1498Szrj   auto_sbitmap epi (SBITMAP_SIZE (components));
1565*38fd1498Szrj   auto_sbitmap tmp (SBITMAP_SIZE (components));
1566*38fd1498Szrj 
1567*38fd1498Szrj   basic_block bb;
1568*38fd1498Szrj   FOR_ALL_BB_FN (bb, cfun)
1569*38fd1498Szrj     bitmap_clear (SW (bb)->tail_components);
1570*38fd1498Szrj 
1571*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1572*38fd1498Szrj     {
1573*38fd1498Szrj       /* Find which prologue resp. epilogue components are needed for all
1574*38fd1498Szrj 	 successor edges from this block.  */
1575*38fd1498Szrj       if (EDGE_COUNT (bb->succs) == 0)
1576*38fd1498Szrj 	continue;
1577*38fd1498Szrj 
1578*38fd1498Szrj       /* First, select all possible components.  */
1579*38fd1498Szrj       bitmap_copy (epi, components);
1580*38fd1498Szrj       bitmap_copy (pro, components);
1581*38fd1498Szrj 
1582*38fd1498Szrj       edge e;
1583*38fd1498Szrj       edge_iterator ei;
1584*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1585*38fd1498Szrj 	{
1586*38fd1498Szrj 	  if (e->flags & EDGE_ABNORMAL)
1587*38fd1498Szrj 	    {
1588*38fd1498Szrj 	      bitmap_clear (epi);
1589*38fd1498Szrj 	      bitmap_clear (pro);
1590*38fd1498Szrj 	      break;
1591*38fd1498Szrj 	    }
1592*38fd1498Szrj 
1593*38fd1498Szrj 	  /* Deselect those epilogue components that should not be inserted
1594*38fd1498Szrj 	     for this edge, and also those that are already put at the head
1595*38fd1498Szrj 	     of the successor block.  */
1596*38fd1498Szrj 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
1597*38fd1498Szrj 			    SW (e->dest)->has_components);
1598*38fd1498Szrj 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1599*38fd1498Szrj 	  bitmap_and (epi, epi, tmp);
1600*38fd1498Szrj 
1601*38fd1498Szrj 	  /* Similarly, for the prologue.  */
1602*38fd1498Szrj 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
1603*38fd1498Szrj 			    SW (e->src)->has_components);
1604*38fd1498Szrj 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1605*38fd1498Szrj 	  bitmap_and (pro, pro, tmp);
1606*38fd1498Szrj 	}
1607*38fd1498Szrj 
1608*38fd1498Szrj       /* If the last insn of this block is a control flow insn we cannot
1609*38fd1498Szrj 	 put anything after it.  We can put our code before it instead,
1610*38fd1498Szrj 	 but only if that jump insn is a simple jump.  */
1611*38fd1498Szrj       rtx_insn *last_insn = BB_END (bb);
1612*38fd1498Szrj       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1613*38fd1498Szrj 	{
1614*38fd1498Szrj 	  bitmap_clear (epi);
1615*38fd1498Szrj 	  bitmap_clear (pro);
1616*38fd1498Szrj 	}
1617*38fd1498Szrj 
1618*38fd1498Szrj       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1619*38fd1498Szrj 	fprintf (dump_file, "  bb %d", bb->index);
1620*38fd1498Szrj 
1621*38fd1498Szrj       if (dump_file && !bitmap_empty_p (epi))
1622*38fd1498Szrj 	dump_components ("epi", epi);
1623*38fd1498Szrj       if (dump_file && !bitmap_empty_p (pro))
1624*38fd1498Szrj 	dump_components ("pro", pro);
1625*38fd1498Szrj 
1626*38fd1498Szrj       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1627*38fd1498Szrj 	fprintf (dump_file, "\n");
1628*38fd1498Szrj 
1629*38fd1498Szrj       /* Put the code at the end of the BB, but before any final jump.  */
1630*38fd1498Szrj       if (!bitmap_empty_p (epi))
1631*38fd1498Szrj 	{
1632*38fd1498Szrj 	  start_sequence ();
1633*38fd1498Szrj 	  targetm.shrink_wrap.emit_epilogue_components (epi);
1634*38fd1498Szrj 	  rtx_insn *seq = get_insns ();
1635*38fd1498Szrj 	  end_sequence ();
1636*38fd1498Szrj 	  record_epilogue_seq (seq);
1637*38fd1498Szrj 
1638*38fd1498Szrj 	  if (control_flow_insn_p (last_insn))
1639*38fd1498Szrj 	    emit_insn_before (seq, last_insn);
1640*38fd1498Szrj 	  else
1641*38fd1498Szrj 	    emit_insn_after (seq, last_insn);
1642*38fd1498Szrj 
1643*38fd1498Szrj 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1644*38fd1498Szrj 	}
1645*38fd1498Szrj 
1646*38fd1498Szrj       if (!bitmap_empty_p (pro))
1647*38fd1498Szrj 	{
1648*38fd1498Szrj 	  start_sequence ();
1649*38fd1498Szrj 	  targetm.shrink_wrap.emit_prologue_components (pro);
1650*38fd1498Szrj 	  rtx_insn *seq = get_insns ();
1651*38fd1498Szrj 	  end_sequence ();
1652*38fd1498Szrj 	  record_prologue_seq (seq);
1653*38fd1498Szrj 
1654*38fd1498Szrj 	  if (control_flow_insn_p (last_insn))
1655*38fd1498Szrj 	    emit_insn_before (seq, last_insn);
1656*38fd1498Szrj 	  else
1657*38fd1498Szrj 	    emit_insn_after (seq, last_insn);
1658*38fd1498Szrj 
1659*38fd1498Szrj 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1660*38fd1498Szrj 	}
1661*38fd1498Szrj     }
1662*38fd1498Szrj }
1663*38fd1498Szrj 
1664*38fd1498Szrj /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1665*38fd1498Szrj    placed them inside blocks directly.  */
1666*38fd1498Szrj static void
insert_prologue_epilogue_for_components(sbitmap components)1667*38fd1498Szrj insert_prologue_epilogue_for_components (sbitmap components)
1668*38fd1498Szrj {
1669*38fd1498Szrj   auto_sbitmap pro (SBITMAP_SIZE (components));
1670*38fd1498Szrj   auto_sbitmap epi (SBITMAP_SIZE (components));
1671*38fd1498Szrj 
1672*38fd1498Szrj   basic_block bb;
1673*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1674*38fd1498Szrj     {
1675*38fd1498Szrj       if (!bb->aux)
1676*38fd1498Szrj 	continue;
1677*38fd1498Szrj 
1678*38fd1498Szrj       edge e;
1679*38fd1498Szrj       edge_iterator ei;
1680*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1681*38fd1498Szrj 	{
1682*38fd1498Szrj 	  /* Find which pro/epilogue components are needed on this edge.  */
1683*38fd1498Szrj 	  bitmap_and_compl (epi, SW (e->src)->has_components,
1684*38fd1498Szrj 			    SW (e->dest)->has_components);
1685*38fd1498Szrj 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
1686*38fd1498Szrj 			    SW (e->src)->has_components);
1687*38fd1498Szrj 	  bitmap_and (epi, epi, components);
1688*38fd1498Szrj 	  bitmap_and (pro, pro, components);
1689*38fd1498Szrj 
1690*38fd1498Szrj 	  /* Deselect those we already have put at the head or tail of the
1691*38fd1498Szrj 	     edge's dest resp. src.  */
1692*38fd1498Szrj 	  bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1693*38fd1498Szrj 	  bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1694*38fd1498Szrj 	  bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1695*38fd1498Szrj 	  bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1696*38fd1498Szrj 
1697*38fd1498Szrj 	  if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1698*38fd1498Szrj 	    {
1699*38fd1498Szrj 	      if (dump_file)
1700*38fd1498Szrj 		{
1701*38fd1498Szrj 		  fprintf (dump_file, "  %d->%d", e->src->index,
1702*38fd1498Szrj 			   e->dest->index);
1703*38fd1498Szrj 		  dump_components ("epi", epi);
1704*38fd1498Szrj 		  dump_components ("pro", pro);
1705*38fd1498Szrj 		  if (e->flags & EDGE_SIBCALL)
1706*38fd1498Szrj 		    fprintf (dump_file, "  (SIBCALL)");
1707*38fd1498Szrj 		  else if (e->flags & EDGE_ABNORMAL)
1708*38fd1498Szrj 		    fprintf (dump_file, "  (ABNORMAL)");
1709*38fd1498Szrj 		  fprintf (dump_file, "\n");
1710*38fd1498Szrj 		}
1711*38fd1498Szrj 
1712*38fd1498Szrj 	      /* Put the epilogue components in place.  */
1713*38fd1498Szrj 	      start_sequence ();
1714*38fd1498Szrj 	      targetm.shrink_wrap.emit_epilogue_components (epi);
1715*38fd1498Szrj 	      rtx_insn *seq = get_insns ();
1716*38fd1498Szrj 	      end_sequence ();
1717*38fd1498Szrj 	      record_epilogue_seq (seq);
1718*38fd1498Szrj 
1719*38fd1498Szrj 	      if (e->flags & EDGE_SIBCALL)
1720*38fd1498Szrj 		{
1721*38fd1498Szrj 		  gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1722*38fd1498Szrj 
1723*38fd1498Szrj 		  rtx_insn *insn = BB_END (e->src);
1724*38fd1498Szrj 		  gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1725*38fd1498Szrj 		  emit_insn_before (seq, insn);
1726*38fd1498Szrj 		}
1727*38fd1498Szrj 	      else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1728*38fd1498Szrj 		{
1729*38fd1498Szrj 		  gcc_assert (e->flags & EDGE_FALLTHRU);
1730*38fd1498Szrj 		  basic_block new_bb = split_edge (e);
1731*38fd1498Szrj 		  emit_insn_after (seq, BB_END (new_bb));
1732*38fd1498Szrj 		}
1733*38fd1498Szrj 	      else
1734*38fd1498Szrj 		insert_insn_on_edge (seq, e);
1735*38fd1498Szrj 
1736*38fd1498Szrj 	      /* Put the prologue components in place.  */
1737*38fd1498Szrj 	      start_sequence ();
1738*38fd1498Szrj 	      targetm.shrink_wrap.emit_prologue_components (pro);
1739*38fd1498Szrj 	      seq = get_insns ();
1740*38fd1498Szrj 	      end_sequence ();
1741*38fd1498Szrj 	      record_prologue_seq (seq);
1742*38fd1498Szrj 
1743*38fd1498Szrj 	      insert_insn_on_edge (seq, e);
1744*38fd1498Szrj 	    }
1745*38fd1498Szrj 	}
1746*38fd1498Szrj     }
1747*38fd1498Szrj 
1748*38fd1498Szrj   commit_edge_insertions ();
1749*38fd1498Szrj }
1750*38fd1498Szrj 
1751*38fd1498Szrj /* The main entry point to this subpass.  FIRST_BB is where the prologue
1752*38fd1498Szrj    would be normally put.  */
1753*38fd1498Szrj void
try_shrink_wrapping_separate(basic_block first_bb)1754*38fd1498Szrj try_shrink_wrapping_separate (basic_block first_bb)
1755*38fd1498Szrj {
1756*38fd1498Szrj   if (HAVE_cc0)
1757*38fd1498Szrj     return;
1758*38fd1498Szrj 
1759*38fd1498Szrj   if (!(SHRINK_WRAPPING_ENABLED
1760*38fd1498Szrj 	&& flag_shrink_wrap_separate
1761*38fd1498Szrj 	&& optimize_function_for_speed_p (cfun)
1762*38fd1498Szrj 	&& targetm.shrink_wrap.get_separate_components))
1763*38fd1498Szrj     return;
1764*38fd1498Szrj 
1765*38fd1498Szrj   /* We don't handle "strange" functions.  */
1766*38fd1498Szrj   if (cfun->calls_alloca
1767*38fd1498Szrj       || cfun->calls_setjmp
1768*38fd1498Szrj       || cfun->can_throw_non_call_exceptions
1769*38fd1498Szrj       || crtl->calls_eh_return
1770*38fd1498Szrj       || crtl->has_nonlocal_goto
1771*38fd1498Szrj       || crtl->saves_all_registers)
1772*38fd1498Szrj     return;
1773*38fd1498Szrj 
1774*38fd1498Szrj   /* Ask the target what components there are.  If it returns NULL, don't
1775*38fd1498Szrj      do anything.  */
1776*38fd1498Szrj   sbitmap components = targetm.shrink_wrap.get_separate_components ();
1777*38fd1498Szrj   if (!components)
1778*38fd1498Szrj     return;
1779*38fd1498Szrj 
1780*38fd1498Szrj   /* We need LIVE info, not defining anything in the entry block and not
1781*38fd1498Szrj      using anything in the exit block.  A block then needs a component if
1782*38fd1498Szrj      the register for that component is in the IN or GEN or KILL set for
1783*38fd1498Szrj      that block.  */
1784*38fd1498Szrj   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1785*38fd1498Szrj   df_update_entry_exit_and_calls ();
1786*38fd1498Szrj   df_live_add_problem ();
1787*38fd1498Szrj   df_live_set_all_dirty ();
1788*38fd1498Szrj   df_analyze ();
1789*38fd1498Szrj 
1790*38fd1498Szrj   calculate_dominance_info (CDI_DOMINATORS);
1791*38fd1498Szrj   calculate_dominance_info (CDI_POST_DOMINATORS);
1792*38fd1498Szrj 
1793*38fd1498Szrj   init_separate_shrink_wrap (components);
1794*38fd1498Szrj 
1795*38fd1498Szrj   sbitmap_iterator sbi;
1796*38fd1498Szrj   unsigned int j;
1797*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1798*38fd1498Szrj     place_prologue_for_one_component (j, first_bb);
1799*38fd1498Szrj 
1800*38fd1498Szrj   spread_components (components);
1801*38fd1498Szrj 
1802*38fd1498Szrj   disqualify_problematic_components (components);
1803*38fd1498Szrj 
1804*38fd1498Szrj   /* Don't separately shrink-wrap anything where the "main" prologue will
1805*38fd1498Szrj      go; the target code can often optimize things if it is presented with
1806*38fd1498Szrj      all components together (say, if it generates store-multiple insns).  */
1807*38fd1498Szrj   bitmap_and_compl (components, components, SW (first_bb)->has_components);
1808*38fd1498Szrj 
1809*38fd1498Szrj   if (bitmap_empty_p (components))
1810*38fd1498Szrj     {
1811*38fd1498Szrj       if (dump_file)
1812*38fd1498Szrj 	fprintf (dump_file, "Not wrapping anything separately.\n");
1813*38fd1498Szrj     }
1814*38fd1498Szrj   else
1815*38fd1498Szrj     {
1816*38fd1498Szrj       if (dump_file)
1817*38fd1498Szrj 	{
1818*38fd1498Szrj 	  fprintf (dump_file, "The components we wrap separately are");
1819*38fd1498Szrj 	  dump_components ("sep", components);
1820*38fd1498Szrj 	  fprintf (dump_file, "\n");
1821*38fd1498Szrj 
1822*38fd1498Szrj 	  fprintf (dump_file, "... Inserting common heads...\n");
1823*38fd1498Szrj 	}
1824*38fd1498Szrj 
1825*38fd1498Szrj       emit_common_heads_for_components (components);
1826*38fd1498Szrj 
1827*38fd1498Szrj       if (dump_file)
1828*38fd1498Szrj 	fprintf (dump_file, "... Inserting common tails...\n");
1829*38fd1498Szrj 
1830*38fd1498Szrj       emit_common_tails_for_components (components);
1831*38fd1498Szrj 
1832*38fd1498Szrj       if (dump_file)
1833*38fd1498Szrj 	fprintf (dump_file, "... Inserting the more difficult ones...\n");
1834*38fd1498Szrj 
1835*38fd1498Szrj       insert_prologue_epilogue_for_components (components);
1836*38fd1498Szrj 
1837*38fd1498Szrj       if (dump_file)
1838*38fd1498Szrj 	fprintf (dump_file, "... Done.\n");
1839*38fd1498Szrj 
1840*38fd1498Szrj       targetm.shrink_wrap.set_handled_components (components);
1841*38fd1498Szrj 
1842*38fd1498Szrj       crtl->shrink_wrapped_separate = true;
1843*38fd1498Szrj     }
1844*38fd1498Szrj 
1845*38fd1498Szrj   fini_separate_shrink_wrap ();
1846*38fd1498Szrj 
1847*38fd1498Szrj   sbitmap_free (components);
1848*38fd1498Szrj   free_dominance_info (CDI_DOMINATORS);
1849*38fd1498Szrj   free_dominance_info (CDI_POST_DOMINATORS);
1850*38fd1498Szrj 
1851*38fd1498Szrj   /* All done.  */
1852*38fd1498Szrj   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1853*38fd1498Szrj   df_update_entry_exit_and_calls ();
1854*38fd1498Szrj   df_live_set_all_dirty ();
1855*38fd1498Szrj   df_analyze ();
1856*38fd1498Szrj }
1857