xref: /dragonfly/contrib/gcc-8.0/gcc/ira-emit.c (revision 38fd1498)
1*38fd1498Szrj /* Integrated Register Allocator.  Changing code and generating moves.
2*38fd1498Szrj    Copyright (C) 2006-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Vladimir Makarov <vmakarov@redhat.com>.
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj /* When we have more one region, we need to change the original RTL
22*38fd1498Szrj    code after coloring.  Let us consider two allocnos representing the
23*38fd1498Szrj    same pseudo-register outside and inside a region respectively.
24*38fd1498Szrj    They can get different hard-registers.  The reload pass works on
25*38fd1498Szrj    pseudo registers basis and there is no way to say the reload that
26*38fd1498Szrj    pseudo could be in different registers and it is even more
27*38fd1498Szrj    difficult to say in what places of the code the pseudo should have
28*38fd1498Szrj    particular hard-registers.  So in this case IRA has to create and
29*38fd1498Szrj    use a new pseudo-register inside the region and adds code to move
30*38fd1498Szrj    allocno values on the region's borders.  This is done by the code
31*38fd1498Szrj    in this file.
32*38fd1498Szrj 
33*38fd1498Szrj    The code makes top-down traversal of the regions and generate new
34*38fd1498Szrj    pseudos and the move code on the region borders.  In some
35*38fd1498Szrj    complicated cases IRA can create a new pseudo used temporarily to
36*38fd1498Szrj    move allocno values when a swap of values stored in two
37*38fd1498Szrj    hard-registers is needed (e.g. two allocnos representing different
38*38fd1498Szrj    pseudos outside region got respectively hard registers 1 and 2 and
39*38fd1498Szrj    the corresponding allocnos inside the region got respectively hard
40*38fd1498Szrj    registers 2 and 1).  At this stage, the new pseudo is marked as
41*38fd1498Szrj    spilled.
42*38fd1498Szrj 
43*38fd1498Szrj    IRA still creates the pseudo-register and the moves on the region
44*38fd1498Szrj    borders even when the both corresponding allocnos were assigned to
45*38fd1498Szrj    the same hard-register.  It is done because, if the reload pass for
46*38fd1498Szrj    some reason spills a pseudo-register representing the original
47*38fd1498Szrj    pseudo outside or inside the region, the effect will be smaller
48*38fd1498Szrj    because another pseudo will still be in the hard-register.  In most
49*38fd1498Szrj    cases, this is better then spilling the original pseudo in its
50*38fd1498Szrj    whole live-range.  If reload does not change the allocation for the
51*38fd1498Szrj    two pseudo-registers, the trivial move will be removed by
52*38fd1498Szrj    post-reload optimizations.
53*38fd1498Szrj 
54*38fd1498Szrj    IRA does not generate a new pseudo and moves for the allocno values
55*38fd1498Szrj    if the both allocnos representing an original pseudo inside and
56*38fd1498Szrj    outside region assigned to the same hard register when the register
57*38fd1498Szrj    pressure in the region for the corresponding pressure class is less
58*38fd1498Szrj    than number of available hard registers for given pressure class.
59*38fd1498Szrj 
60*38fd1498Szrj    IRA also does some optimizations to remove redundant moves which is
61*38fd1498Szrj    transformed into stores by the reload pass on CFG edges
62*38fd1498Szrj    representing exits from the region.
63*38fd1498Szrj 
64*38fd1498Szrj    IRA tries to reduce duplication of code generated on CFG edges
65*38fd1498Szrj    which are enters and exits to/from regions by moving some code to
66*38fd1498Szrj    the edge sources or destinations when it is possible.  */
67*38fd1498Szrj 
68*38fd1498Szrj #include "config.h"
69*38fd1498Szrj #include "system.h"
70*38fd1498Szrj #include "coretypes.h"
71*38fd1498Szrj #include "backend.h"
72*38fd1498Szrj #include "rtl.h"
73*38fd1498Szrj #include "tree.h"
74*38fd1498Szrj #include "predict.h"
75*38fd1498Szrj #include "df.h"
76*38fd1498Szrj #include "insn-config.h"
77*38fd1498Szrj #include "regs.h"
78*38fd1498Szrj #include "memmodel.h"
79*38fd1498Szrj #include "ira.h"
80*38fd1498Szrj #include "ira-int.h"
81*38fd1498Szrj #include "cfgrtl.h"
82*38fd1498Szrj #include "cfgbuild.h"
83*38fd1498Szrj #include "expr.h"
84*38fd1498Szrj #include "reload.h"
85*38fd1498Szrj #include "cfgloop.h"
86*38fd1498Szrj 
87*38fd1498Szrj 
88*38fd1498Szrj /* Data used to emit live range split insns and to flattening IR.  */
89*38fd1498Szrj ira_emit_data_t ira_allocno_emit_data;
90*38fd1498Szrj 
91*38fd1498Szrj /* Definitions for vectors of pointers.  */
92*38fd1498Szrj typedef void *void_p;
93*38fd1498Szrj 
94*38fd1498Szrj /* Pointers to data allocated for allocnos being created during
95*38fd1498Szrj    emitting.  Usually there are quite few such allocnos because they
96*38fd1498Szrj    are created only for resolving loop in register shuffling.  */
97*38fd1498Szrj static vec<void_p> new_allocno_emit_data_vec;
98*38fd1498Szrj 
99*38fd1498Szrj /* Allocate and initiate the emit data.  */
100*38fd1498Szrj void
ira_initiate_emit_data(void)101*38fd1498Szrj ira_initiate_emit_data (void)
102*38fd1498Szrj {
103*38fd1498Szrj   ira_allocno_t a;
104*38fd1498Szrj   ira_allocno_iterator ai;
105*38fd1498Szrj 
106*38fd1498Szrj   ira_allocno_emit_data
107*38fd1498Szrj     = (ira_emit_data_t) ira_allocate (ira_allocnos_num
108*38fd1498Szrj 				      * sizeof (struct ira_emit_data));
109*38fd1498Szrj   memset (ira_allocno_emit_data, 0,
110*38fd1498Szrj 	  ira_allocnos_num * sizeof (struct ira_emit_data));
111*38fd1498Szrj   FOR_EACH_ALLOCNO (a, ai)
112*38fd1498Szrj     ALLOCNO_ADD_DATA (a) = ira_allocno_emit_data + ALLOCNO_NUM (a);
113*38fd1498Szrj   new_allocno_emit_data_vec.create (50);
114*38fd1498Szrj 
115*38fd1498Szrj }
116*38fd1498Szrj 
117*38fd1498Szrj /* Free the emit data.  */
118*38fd1498Szrj void
ira_finish_emit_data(void)119*38fd1498Szrj ira_finish_emit_data (void)
120*38fd1498Szrj {
121*38fd1498Szrj   void_p p;
122*38fd1498Szrj   ira_allocno_t a;
123*38fd1498Szrj   ira_allocno_iterator ai;
124*38fd1498Szrj 
125*38fd1498Szrj   ira_free (ira_allocno_emit_data);
126*38fd1498Szrj   FOR_EACH_ALLOCNO (a, ai)
127*38fd1498Szrj     ALLOCNO_ADD_DATA (a) = NULL;
128*38fd1498Szrj   for (;new_allocno_emit_data_vec.length () != 0;)
129*38fd1498Szrj     {
130*38fd1498Szrj       p = new_allocno_emit_data_vec.pop ();
131*38fd1498Szrj       ira_free (p);
132*38fd1498Szrj     }
133*38fd1498Szrj   new_allocno_emit_data_vec.release ();
134*38fd1498Szrj }
135*38fd1498Szrj 
136*38fd1498Szrj /* Create and return a new allocno with given REGNO and
137*38fd1498Szrj    LOOP_TREE_NODE.  Allocate emit data for it.  */
138*38fd1498Szrj static ira_allocno_t
create_new_allocno(int regno,ira_loop_tree_node_t loop_tree_node)139*38fd1498Szrj create_new_allocno (int regno, ira_loop_tree_node_t loop_tree_node)
140*38fd1498Szrj {
141*38fd1498Szrj   ira_allocno_t a;
142*38fd1498Szrj 
143*38fd1498Szrj   a = ira_create_allocno (regno, false, loop_tree_node);
144*38fd1498Szrj   ALLOCNO_ADD_DATA (a) = ira_allocate (sizeof (struct ira_emit_data));
145*38fd1498Szrj   memset (ALLOCNO_ADD_DATA (a), 0, sizeof (struct ira_emit_data));
146*38fd1498Szrj   new_allocno_emit_data_vec.safe_push (ALLOCNO_ADD_DATA (a));
147*38fd1498Szrj   return a;
148*38fd1498Szrj }
149*38fd1498Szrj 
150*38fd1498Szrj 
151*38fd1498Szrj 
152*38fd1498Szrj /* See comments below.  */
153*38fd1498Szrj typedef struct move *move_t;
154*38fd1498Szrj 
155*38fd1498Szrj /* The structure represents an allocno move.  Both allocnos have the
156*38fd1498Szrj    same original regno but different allocation.  */
157*38fd1498Szrj struct move
158*38fd1498Szrj {
159*38fd1498Szrj   /* The allocnos involved in the move.  */
160*38fd1498Szrj   ira_allocno_t from, to;
161*38fd1498Szrj   /* The next move in the move sequence.  */
162*38fd1498Szrj   move_t next;
163*38fd1498Szrj   /* Used for finding dependencies.  */
164*38fd1498Szrj   bool visited_p;
165*38fd1498Szrj   /* The size of the following array. */
166*38fd1498Szrj   int deps_num;
167*38fd1498Szrj   /* Moves on which given move depends on.  Dependency can be cyclic.
168*38fd1498Szrj      It means we need a temporary to generates the moves.  Sequence
169*38fd1498Szrj      A1->A2, B1->B2 where A1 and B2 are assigned to reg R1 and A2 and
170*38fd1498Szrj      B1 are assigned to reg R2 is an example of the cyclic
171*38fd1498Szrj      dependencies.  */
172*38fd1498Szrj   move_t *deps;
173*38fd1498Szrj   /* First insn generated for the move.  */
174*38fd1498Szrj   rtx_insn *insn;
175*38fd1498Szrj };
176*38fd1498Szrj 
177*38fd1498Szrj /* Array of moves (indexed by BB index) which should be put at the
178*38fd1498Szrj    start/end of the corresponding basic blocks.  */
179*38fd1498Szrj static move_t *at_bb_start, *at_bb_end;
180*38fd1498Szrj 
181*38fd1498Szrj /* Max regno before renaming some pseudo-registers.  For example, the
182*38fd1498Szrj    same pseudo-register can be renamed in a loop if its allocation is
183*38fd1498Szrj    different outside the loop.  */
184*38fd1498Szrj static int max_regno_before_changing;
185*38fd1498Szrj 
186*38fd1498Szrj /* Return new move of allocnos TO and FROM.  */
187*38fd1498Szrj static move_t
create_move(ira_allocno_t to,ira_allocno_t from)188*38fd1498Szrj create_move (ira_allocno_t to, ira_allocno_t from)
189*38fd1498Szrj {
190*38fd1498Szrj   move_t move;
191*38fd1498Szrj 
192*38fd1498Szrj   move = (move_t) ira_allocate (sizeof (struct move));
193*38fd1498Szrj   move->deps = NULL;
194*38fd1498Szrj   move->deps_num = 0;
195*38fd1498Szrj   move->to = to;
196*38fd1498Szrj   move->from = from;
197*38fd1498Szrj   move->next = NULL;
198*38fd1498Szrj   move->insn = NULL;
199*38fd1498Szrj   move->visited_p = false;
200*38fd1498Szrj   return move;
201*38fd1498Szrj }
202*38fd1498Szrj 
203*38fd1498Szrj /* Free memory for MOVE and its dependencies.  */
204*38fd1498Szrj static void
free_move(move_t move)205*38fd1498Szrj free_move (move_t move)
206*38fd1498Szrj {
207*38fd1498Szrj   if (move->deps != NULL)
208*38fd1498Szrj     ira_free (move->deps);
209*38fd1498Szrj   ira_free (move);
210*38fd1498Szrj }
211*38fd1498Szrj 
212*38fd1498Szrj /* Free memory for list of the moves given by its HEAD.  */
213*38fd1498Szrj static void
free_move_list(move_t head)214*38fd1498Szrj free_move_list (move_t head)
215*38fd1498Szrj {
216*38fd1498Szrj   move_t next;
217*38fd1498Szrj 
218*38fd1498Szrj   for (; head != NULL; head = next)
219*38fd1498Szrj     {
220*38fd1498Szrj       next = head->next;
221*38fd1498Szrj       free_move (head);
222*38fd1498Szrj     }
223*38fd1498Szrj }
224*38fd1498Szrj 
225*38fd1498Szrj /* Return TRUE if the move list LIST1 and LIST2 are equal (two
226*38fd1498Szrj    moves are equal if they involve the same allocnos).  */
227*38fd1498Szrj static bool
eq_move_lists_p(move_t list1,move_t list2)228*38fd1498Szrj eq_move_lists_p (move_t list1, move_t list2)
229*38fd1498Szrj {
230*38fd1498Szrj   for (; list1 != NULL && list2 != NULL;
231*38fd1498Szrj        list1 = list1->next, list2 = list2->next)
232*38fd1498Szrj     if (list1->from != list2->from || list1->to != list2->to)
233*38fd1498Szrj       return false;
234*38fd1498Szrj   return list1 == list2;
235*38fd1498Szrj }
236*38fd1498Szrj 
237*38fd1498Szrj /* Print move list LIST into file F.  */
238*38fd1498Szrj static void
print_move_list(FILE * f,move_t list)239*38fd1498Szrj print_move_list (FILE *f, move_t list)
240*38fd1498Szrj {
241*38fd1498Szrj   for (; list != NULL; list = list->next)
242*38fd1498Szrj     fprintf (f, " a%dr%d->a%dr%d",
243*38fd1498Szrj 	     ALLOCNO_NUM (list->from), ALLOCNO_REGNO (list->from),
244*38fd1498Szrj 	     ALLOCNO_NUM (list->to), ALLOCNO_REGNO (list->to));
245*38fd1498Szrj   fprintf (f, "\n");
246*38fd1498Szrj }
247*38fd1498Szrj 
248*38fd1498Szrj extern void ira_debug_move_list (move_t list);
249*38fd1498Szrj 
250*38fd1498Szrj /* Print move list LIST into stderr.  */
251*38fd1498Szrj void
ira_debug_move_list(move_t list)252*38fd1498Szrj ira_debug_move_list (move_t list)
253*38fd1498Szrj {
254*38fd1498Szrj   print_move_list (stderr, list);
255*38fd1498Szrj }
256*38fd1498Szrj 
257*38fd1498Szrj /* This recursive function changes pseudo-registers in *LOC if it is
258*38fd1498Szrj    necessary.  The function returns TRUE if a change was done.  */
259*38fd1498Szrj static bool
change_regs(rtx * loc)260*38fd1498Szrj change_regs (rtx *loc)
261*38fd1498Szrj {
262*38fd1498Szrj   int i, regno, result = false;
263*38fd1498Szrj   const char *fmt;
264*38fd1498Szrj   enum rtx_code code;
265*38fd1498Szrj   rtx reg;
266*38fd1498Szrj 
267*38fd1498Szrj   if (*loc == NULL_RTX)
268*38fd1498Szrj     return false;
269*38fd1498Szrj   code = GET_CODE (*loc);
270*38fd1498Szrj   if (code == REG)
271*38fd1498Szrj     {
272*38fd1498Szrj       regno = REGNO (*loc);
273*38fd1498Szrj       if (regno < FIRST_PSEUDO_REGISTER)
274*38fd1498Szrj 	return false;
275*38fd1498Szrj       if (regno >= max_regno_before_changing)
276*38fd1498Szrj 	/* It is a shared register which was changed already.  */
277*38fd1498Szrj 	return false;
278*38fd1498Szrj       if (ira_curr_regno_allocno_map[regno] == NULL)
279*38fd1498Szrj 	return false;
280*38fd1498Szrj       reg = allocno_emit_reg (ira_curr_regno_allocno_map[regno]);
281*38fd1498Szrj       if (reg == *loc)
282*38fd1498Szrj 	return false;
283*38fd1498Szrj       *loc = reg;
284*38fd1498Szrj       return true;
285*38fd1498Szrj     }
286*38fd1498Szrj 
287*38fd1498Szrj   fmt = GET_RTX_FORMAT (code);
288*38fd1498Szrj   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
289*38fd1498Szrj     {
290*38fd1498Szrj       if (fmt[i] == 'e')
291*38fd1498Szrj 	result = change_regs (&XEXP (*loc, i)) || result;
292*38fd1498Szrj       else if (fmt[i] == 'E')
293*38fd1498Szrj 	{
294*38fd1498Szrj 	  int j;
295*38fd1498Szrj 
296*38fd1498Szrj 	  for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
297*38fd1498Szrj 	    result = change_regs (&XVECEXP (*loc, i, j)) || result;
298*38fd1498Szrj 	}
299*38fd1498Szrj     }
300*38fd1498Szrj   return result;
301*38fd1498Szrj }
302*38fd1498Szrj 
303*38fd1498Szrj static bool
change_regs_in_insn(rtx_insn ** insn_ptr)304*38fd1498Szrj change_regs_in_insn (rtx_insn **insn_ptr)
305*38fd1498Szrj {
306*38fd1498Szrj   rtx rtx = *insn_ptr;
307*38fd1498Szrj   bool result = change_regs (&rtx);
308*38fd1498Szrj   *insn_ptr = as_a <rtx_insn *> (rtx);
309*38fd1498Szrj   return result;
310*38fd1498Szrj }
311*38fd1498Szrj 
312*38fd1498Szrj /* Attach MOVE to the edge E.  The move is attached to the head of the
313*38fd1498Szrj    list if HEAD_P is TRUE.  */
314*38fd1498Szrj static void
add_to_edge_list(edge e,move_t move,bool head_p)315*38fd1498Szrj add_to_edge_list (edge e, move_t move, bool head_p)
316*38fd1498Szrj {
317*38fd1498Szrj   move_t last;
318*38fd1498Szrj 
319*38fd1498Szrj   if (head_p || e->aux == NULL)
320*38fd1498Szrj     {
321*38fd1498Szrj       move->next = (move_t) e->aux;
322*38fd1498Szrj       e->aux = move;
323*38fd1498Szrj     }
324*38fd1498Szrj   else
325*38fd1498Szrj     {
326*38fd1498Szrj       for (last = (move_t) e->aux; last->next != NULL; last = last->next)
327*38fd1498Szrj 	;
328*38fd1498Szrj       last->next = move;
329*38fd1498Szrj       move->next = NULL;
330*38fd1498Szrj     }
331*38fd1498Szrj }
332*38fd1498Szrj 
333*38fd1498Szrj /* Create and return new pseudo-register with the same attributes as
334*38fd1498Szrj    ORIGINAL_REG.  */
335*38fd1498Szrj rtx
ira_create_new_reg(rtx original_reg)336*38fd1498Szrj ira_create_new_reg (rtx original_reg)
337*38fd1498Szrj {
338*38fd1498Szrj   rtx new_reg;
339*38fd1498Szrj 
340*38fd1498Szrj   new_reg = gen_reg_rtx (GET_MODE (original_reg));
341*38fd1498Szrj   ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (original_reg);
342*38fd1498Szrj   REG_USERVAR_P (new_reg) = REG_USERVAR_P (original_reg);
343*38fd1498Szrj   REG_POINTER (new_reg) = REG_POINTER (original_reg);
344*38fd1498Szrj   REG_ATTRS (new_reg) = REG_ATTRS (original_reg);
345*38fd1498Szrj   if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
346*38fd1498Szrj     fprintf (ira_dump_file, "      Creating newreg=%i from oldreg=%i\n",
347*38fd1498Szrj 	     REGNO (new_reg), REGNO (original_reg));
348*38fd1498Szrj   ira_expand_reg_equiv ();
349*38fd1498Szrj   return new_reg;
350*38fd1498Szrj }
351*38fd1498Szrj 
352*38fd1498Szrj /* Return TRUE if loop given by SUBNODE inside the loop given by
353*38fd1498Szrj    NODE.  */
354*38fd1498Szrj static bool
subloop_tree_node_p(ira_loop_tree_node_t subnode,ira_loop_tree_node_t node)355*38fd1498Szrj subloop_tree_node_p (ira_loop_tree_node_t subnode, ira_loop_tree_node_t node)
356*38fd1498Szrj {
357*38fd1498Szrj   for (; subnode != NULL; subnode = subnode->parent)
358*38fd1498Szrj     if (subnode == node)
359*38fd1498Szrj       return true;
360*38fd1498Szrj   return false;
361*38fd1498Szrj }
362*38fd1498Szrj 
363*38fd1498Szrj /* Set up member `reg' to REG for allocnos which has the same regno as
364*38fd1498Szrj    ALLOCNO and which are inside the loop corresponding to ALLOCNO. */
365*38fd1498Szrj static void
set_allocno_reg(ira_allocno_t allocno,rtx reg)366*38fd1498Szrj set_allocno_reg (ira_allocno_t allocno, rtx reg)
367*38fd1498Szrj {
368*38fd1498Szrj   int regno;
369*38fd1498Szrj   ira_allocno_t a;
370*38fd1498Szrj   ira_loop_tree_node_t node;
371*38fd1498Szrj 
372*38fd1498Szrj   node = ALLOCNO_LOOP_TREE_NODE (allocno);
373*38fd1498Szrj   for (a = ira_regno_allocno_map[ALLOCNO_REGNO (allocno)];
374*38fd1498Szrj        a != NULL;
375*38fd1498Szrj        a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
376*38fd1498Szrj     if (subloop_tree_node_p (ALLOCNO_LOOP_TREE_NODE (a), node))
377*38fd1498Szrj       ALLOCNO_EMIT_DATA (a)->reg = reg;
378*38fd1498Szrj   for (a = ALLOCNO_CAP (allocno); a != NULL; a = ALLOCNO_CAP (a))
379*38fd1498Szrj     ALLOCNO_EMIT_DATA (a)->reg = reg;
380*38fd1498Szrj   regno = ALLOCNO_REGNO (allocno);
381*38fd1498Szrj   for (a = allocno;;)
382*38fd1498Szrj     {
383*38fd1498Szrj       if (a == NULL || (a = ALLOCNO_CAP (a)) == NULL)
384*38fd1498Szrj 	{
385*38fd1498Szrj 	  node = node->parent;
386*38fd1498Szrj 	  if (node == NULL)
387*38fd1498Szrj 	    break;
388*38fd1498Szrj 	  a = node->regno_allocno_map[regno];
389*38fd1498Szrj 	}
390*38fd1498Szrj       if (a == NULL)
391*38fd1498Szrj 	continue;
392*38fd1498Szrj       if (ALLOCNO_EMIT_DATA (a)->child_renamed_p)
393*38fd1498Szrj 	break;
394*38fd1498Szrj       ALLOCNO_EMIT_DATA (a)->child_renamed_p = true;
395*38fd1498Szrj     }
396*38fd1498Szrj }
397*38fd1498Szrj 
398*38fd1498Szrj /* Return true if there is an entry to given loop not from its parent
399*38fd1498Szrj    (or grandparent) block.  For example, it is possible for two
400*38fd1498Szrj    adjacent loops inside another loop.  */
401*38fd1498Szrj static bool
entered_from_non_parent_p(ira_loop_tree_node_t loop_node)402*38fd1498Szrj entered_from_non_parent_p (ira_loop_tree_node_t loop_node)
403*38fd1498Szrj {
404*38fd1498Szrj   ira_loop_tree_node_t bb_node, src_loop_node, parent;
405*38fd1498Szrj   edge e;
406*38fd1498Szrj   edge_iterator ei;
407*38fd1498Szrj 
408*38fd1498Szrj   for (bb_node = loop_node->children;
409*38fd1498Szrj        bb_node != NULL;
410*38fd1498Szrj        bb_node = bb_node->next)
411*38fd1498Szrj     if (bb_node->bb != NULL)
412*38fd1498Szrj       {
413*38fd1498Szrj 	FOR_EACH_EDGE (e, ei, bb_node->bb->preds)
414*38fd1498Szrj 	  if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
415*38fd1498Szrj 	      && (src_loop_node = IRA_BB_NODE (e->src)->parent) != loop_node)
416*38fd1498Szrj 	    {
417*38fd1498Szrj 	      for (parent = src_loop_node->parent;
418*38fd1498Szrj 		   parent != NULL;
419*38fd1498Szrj 		   parent = parent->parent)
420*38fd1498Szrj 		if (parent == loop_node)
421*38fd1498Szrj 		  break;
422*38fd1498Szrj 	      if (parent != NULL)
423*38fd1498Szrj 		/* That is an exit from a nested loop -- skip it.  */
424*38fd1498Szrj 		continue;
425*38fd1498Szrj 	      for (parent = loop_node->parent;
426*38fd1498Szrj 		   parent != NULL;
427*38fd1498Szrj 		   parent = parent->parent)
428*38fd1498Szrj 		if (src_loop_node == parent)
429*38fd1498Szrj 		  break;
430*38fd1498Szrj 	      if (parent == NULL)
431*38fd1498Szrj 		return true;
432*38fd1498Szrj 	    }
433*38fd1498Szrj       }
434*38fd1498Szrj   return false;
435*38fd1498Szrj }
436*38fd1498Szrj 
437*38fd1498Szrj /* Set up ENTERED_FROM_NON_PARENT_P for each loop region.  */
438*38fd1498Szrj static void
setup_entered_from_non_parent_p(void)439*38fd1498Szrj setup_entered_from_non_parent_p (void)
440*38fd1498Szrj {
441*38fd1498Szrj   unsigned int i;
442*38fd1498Szrj   loop_p loop;
443*38fd1498Szrj 
444*38fd1498Szrj   ira_assert (current_loops != NULL);
445*38fd1498Szrj   FOR_EACH_VEC_SAFE_ELT (get_loops (cfun), i, loop)
446*38fd1498Szrj     if (ira_loop_nodes[i].regno_allocno_map != NULL)
447*38fd1498Szrj       ira_loop_nodes[i].entered_from_non_parent_p
448*38fd1498Szrj 	= entered_from_non_parent_p (&ira_loop_nodes[i]);
449*38fd1498Szrj }
450*38fd1498Szrj 
451*38fd1498Szrj /* Return TRUE if move of SRC_ALLOCNO (assigned to hard register) to
452*38fd1498Szrj    DEST_ALLOCNO (assigned to memory) can be removed because it does
453*38fd1498Szrj    not change value of the destination.  One possible reason for this
454*38fd1498Szrj    is the situation when SRC_ALLOCNO is not modified in the
455*38fd1498Szrj    corresponding loop.  */
456*38fd1498Szrj static bool
store_can_be_removed_p(ira_allocno_t src_allocno,ira_allocno_t dest_allocno)457*38fd1498Szrj store_can_be_removed_p (ira_allocno_t src_allocno, ira_allocno_t dest_allocno)
458*38fd1498Szrj {
459*38fd1498Szrj   int regno, orig_regno;
460*38fd1498Szrj   ira_allocno_t a;
461*38fd1498Szrj   ira_loop_tree_node_t node;
462*38fd1498Szrj 
463*38fd1498Szrj   ira_assert (ALLOCNO_CAP_MEMBER (src_allocno) == NULL
464*38fd1498Szrj 	      && ALLOCNO_CAP_MEMBER (dest_allocno) == NULL);
465*38fd1498Szrj   orig_regno = ALLOCNO_REGNO (src_allocno);
466*38fd1498Szrj   regno = REGNO (allocno_emit_reg (dest_allocno));
467*38fd1498Szrj   for (node = ALLOCNO_LOOP_TREE_NODE (src_allocno);
468*38fd1498Szrj        node != NULL;
469*38fd1498Szrj        node = node->parent)
470*38fd1498Szrj     {
471*38fd1498Szrj       a = node->regno_allocno_map[orig_regno];
472*38fd1498Szrj       ira_assert (a != NULL);
473*38fd1498Szrj       if (REGNO (allocno_emit_reg (a)) == (unsigned) regno)
474*38fd1498Szrj 	/* We achieved the destination and everything is ok.  */
475*38fd1498Szrj 	return true;
476*38fd1498Szrj       else if (bitmap_bit_p (node->modified_regnos, orig_regno))
477*38fd1498Szrj 	return false;
478*38fd1498Szrj       else if (node->entered_from_non_parent_p)
479*38fd1498Szrj 	/* If there is a path from a destination loop block to the
480*38fd1498Szrj 	   source loop header containing basic blocks of non-parents
481*38fd1498Szrj 	   (grandparents) of the source loop, we should have checked
482*38fd1498Szrj 	   modifications of the pseudo on this path too to decide
483*38fd1498Szrj 	   about possibility to remove the store.  It could be done by
484*38fd1498Szrj 	   solving a data-flow problem.  Unfortunately such global
485*38fd1498Szrj 	   solution would complicate IR flattening.  Therefore we just
486*38fd1498Szrj 	   prohibit removal of the store in such complicated case.  */
487*38fd1498Szrj 	return false;
488*38fd1498Szrj     }
489*38fd1498Szrj   /* It is actually a loop entry -- do not remove the store.  */
490*38fd1498Szrj   return false;
491*38fd1498Szrj }
492*38fd1498Szrj 
493*38fd1498Szrj /* Generate and attach moves to the edge E.  This looks at the final
494*38fd1498Szrj    regnos of allocnos living on the edge with the same original regno
495*38fd1498Szrj    to figure out when moves should be generated.  */
496*38fd1498Szrj static void
generate_edge_moves(edge e)497*38fd1498Szrj generate_edge_moves (edge e)
498*38fd1498Szrj {
499*38fd1498Szrj   ira_loop_tree_node_t src_loop_node, dest_loop_node;
500*38fd1498Szrj   unsigned int regno;
501*38fd1498Szrj   bitmap_iterator bi;
502*38fd1498Szrj   ira_allocno_t src_allocno, dest_allocno, *src_map, *dest_map;
503*38fd1498Szrj   move_t move;
504*38fd1498Szrj   bitmap regs_live_in_dest, regs_live_out_src;
505*38fd1498Szrj 
506*38fd1498Szrj   src_loop_node = IRA_BB_NODE (e->src)->parent;
507*38fd1498Szrj   dest_loop_node = IRA_BB_NODE (e->dest)->parent;
508*38fd1498Szrj   e->aux = NULL;
509*38fd1498Szrj   if (src_loop_node == dest_loop_node)
510*38fd1498Szrj     return;
511*38fd1498Szrj   src_map = src_loop_node->regno_allocno_map;
512*38fd1498Szrj   dest_map = dest_loop_node->regno_allocno_map;
513*38fd1498Szrj   regs_live_in_dest = df_get_live_in (e->dest);
514*38fd1498Szrj   regs_live_out_src = df_get_live_out (e->src);
515*38fd1498Szrj   EXECUTE_IF_SET_IN_REG_SET (regs_live_in_dest,
516*38fd1498Szrj 			     FIRST_PSEUDO_REGISTER, regno, bi)
517*38fd1498Szrj     if (bitmap_bit_p (regs_live_out_src, regno))
518*38fd1498Szrj       {
519*38fd1498Szrj 	src_allocno = src_map[regno];
520*38fd1498Szrj 	dest_allocno = dest_map[regno];
521*38fd1498Szrj 	if (REGNO (allocno_emit_reg (src_allocno))
522*38fd1498Szrj 	    == REGNO (allocno_emit_reg (dest_allocno)))
523*38fd1498Szrj 	  continue;
524*38fd1498Szrj 	/* Remove unnecessary stores at the region exit.  We should do
525*38fd1498Szrj 	   this for readonly memory for sure and this is guaranteed by
526*38fd1498Szrj 	   that we never generate moves on region borders (see
527*38fd1498Szrj 	   checking in function change_loop).  */
528*38fd1498Szrj  	if (ALLOCNO_HARD_REGNO (dest_allocno) < 0
529*38fd1498Szrj 	    && ALLOCNO_HARD_REGNO (src_allocno) >= 0
530*38fd1498Szrj 	    && store_can_be_removed_p (src_allocno, dest_allocno))
531*38fd1498Szrj 	  {
532*38fd1498Szrj 	    ALLOCNO_EMIT_DATA (src_allocno)->mem_optimized_dest = dest_allocno;
533*38fd1498Szrj 	    ALLOCNO_EMIT_DATA (dest_allocno)->mem_optimized_dest_p = true;
534*38fd1498Szrj 	    if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
535*38fd1498Szrj 	      fprintf (ira_dump_file, "      Remove r%d:a%d->a%d(mem)\n",
536*38fd1498Szrj 		       regno, ALLOCNO_NUM (src_allocno),
537*38fd1498Szrj 		       ALLOCNO_NUM (dest_allocno));
538*38fd1498Szrj 	    continue;
539*38fd1498Szrj 	  }
540*38fd1498Szrj 	move = create_move (dest_allocno, src_allocno);
541*38fd1498Szrj 	add_to_edge_list (e, move, true);
542*38fd1498Szrj     }
543*38fd1498Szrj }
544*38fd1498Szrj 
545*38fd1498Szrj /* Bitmap of allocnos local for the current loop.  */
546*38fd1498Szrj static bitmap local_allocno_bitmap;
547*38fd1498Szrj 
548*38fd1498Szrj /* This bitmap is used to find that we need to generate and to use a
549*38fd1498Szrj    new pseudo-register when processing allocnos with the same original
550*38fd1498Szrj    regno.  */
551*38fd1498Szrj static bitmap used_regno_bitmap;
552*38fd1498Szrj 
553*38fd1498Szrj /* This bitmap contains regnos of allocnos which were renamed locally
554*38fd1498Szrj    because the allocnos correspond to disjoint live ranges in loops
555*38fd1498Szrj    with a common parent.  */
556*38fd1498Szrj static bitmap renamed_regno_bitmap;
557*38fd1498Szrj 
558*38fd1498Szrj /* Change (if necessary) pseudo-registers inside loop given by loop
559*38fd1498Szrj    tree node NODE.  */
560*38fd1498Szrj static void
change_loop(ira_loop_tree_node_t node)561*38fd1498Szrj change_loop (ira_loop_tree_node_t node)
562*38fd1498Szrj {
563*38fd1498Szrj   bitmap_iterator bi;
564*38fd1498Szrj   unsigned int i;
565*38fd1498Szrj   int regno;
566*38fd1498Szrj   bool used_p;
567*38fd1498Szrj   ira_allocno_t allocno, parent_allocno, *map;
568*38fd1498Szrj   rtx_insn *insn;
569*38fd1498Szrj   rtx original_reg;
570*38fd1498Szrj   enum reg_class aclass, pclass;
571*38fd1498Szrj   ira_loop_tree_node_t parent;
572*38fd1498Szrj 
573*38fd1498Szrj   if (node != ira_loop_tree_root)
574*38fd1498Szrj     {
575*38fd1498Szrj       ira_assert (current_loops != NULL);
576*38fd1498Szrj 
577*38fd1498Szrj       if (node->bb != NULL)
578*38fd1498Szrj 	{
579*38fd1498Szrj 	  FOR_BB_INSNS (node->bb, insn)
580*38fd1498Szrj 	    if (INSN_P (insn) && change_regs_in_insn (&insn))
581*38fd1498Szrj 	      {
582*38fd1498Szrj 		df_insn_rescan (insn);
583*38fd1498Szrj 		df_notes_rescan (insn);
584*38fd1498Szrj 	      }
585*38fd1498Szrj 	  return;
586*38fd1498Szrj 	}
587*38fd1498Szrj 
588*38fd1498Szrj       if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
589*38fd1498Szrj 	fprintf (ira_dump_file,
590*38fd1498Szrj 		 "      Changing RTL for loop %d (header bb%d)\n",
591*38fd1498Szrj 		 node->loop_num, node->loop->header->index);
592*38fd1498Szrj 
593*38fd1498Szrj       parent = ira_curr_loop_tree_node->parent;
594*38fd1498Szrj       map = parent->regno_allocno_map;
595*38fd1498Szrj       EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos,
596*38fd1498Szrj 				 0, i, bi)
597*38fd1498Szrj 	{
598*38fd1498Szrj 	  allocno = ira_allocnos[i];
599*38fd1498Szrj 	  regno = ALLOCNO_REGNO (allocno);
600*38fd1498Szrj 	  aclass = ALLOCNO_CLASS (allocno);
601*38fd1498Szrj 	  pclass = ira_pressure_class_translate[aclass];
602*38fd1498Szrj 	  parent_allocno = map[regno];
603*38fd1498Szrj 	  ira_assert (regno < ira_reg_equiv_len);
604*38fd1498Szrj 	  /* We generate the same hard register move because the
605*38fd1498Szrj 	     reload pass can put an allocno into memory in this case
606*38fd1498Szrj 	     we will have live range splitting.  If it does not happen
607*38fd1498Szrj 	     such the same hard register moves will be removed.  The
608*38fd1498Szrj 	     worst case when the both allocnos are put into memory by
609*38fd1498Szrj 	     the reload is very rare.  */
610*38fd1498Szrj 	  if (parent_allocno != NULL
611*38fd1498Szrj 	      && (ALLOCNO_HARD_REGNO (allocno)
612*38fd1498Szrj 		  == ALLOCNO_HARD_REGNO (parent_allocno))
613*38fd1498Szrj 	      && (ALLOCNO_HARD_REGNO (allocno) < 0
614*38fd1498Szrj 		  || (parent->reg_pressure[pclass] + 1
615*38fd1498Szrj 		      <= ira_class_hard_regs_num[pclass])
616*38fd1498Szrj 		  || TEST_HARD_REG_BIT (ira_prohibited_mode_move_regs
617*38fd1498Szrj 					[ALLOCNO_MODE (allocno)],
618*38fd1498Szrj 					ALLOCNO_HARD_REGNO (allocno))
619*38fd1498Szrj 		  /* don't create copies because reload can spill an
620*38fd1498Szrj 		     allocno set by copy although the allocno will not
621*38fd1498Szrj 		     get memory slot.  */
622*38fd1498Szrj 		  || ira_equiv_no_lvalue_p (regno)
623*38fd1498Szrj 		  || (pic_offset_table_rtx != NULL
624*38fd1498Szrj 		      && (ALLOCNO_REGNO (allocno)
625*38fd1498Szrj 			  == (int) REGNO (pic_offset_table_rtx)))))
626*38fd1498Szrj 	    continue;
627*38fd1498Szrj 	  original_reg = allocno_emit_reg (allocno);
628*38fd1498Szrj 	  if (parent_allocno == NULL
629*38fd1498Szrj 	      || (REGNO (allocno_emit_reg (parent_allocno))
630*38fd1498Szrj 		  == REGNO (original_reg)))
631*38fd1498Szrj 	    {
632*38fd1498Szrj 	      if (internal_flag_ira_verbose > 3 && ira_dump_file)
633*38fd1498Szrj 		fprintf (ira_dump_file, "  %i vs parent %i:",
634*38fd1498Szrj 			 ALLOCNO_HARD_REGNO (allocno),
635*38fd1498Szrj 			 ALLOCNO_HARD_REGNO (parent_allocno));
636*38fd1498Szrj 	      set_allocno_reg (allocno, ira_create_new_reg (original_reg));
637*38fd1498Szrj 	    }
638*38fd1498Szrj 	}
639*38fd1498Szrj     }
640*38fd1498Szrj   /* Rename locals: Local allocnos with same regno in different loops
641*38fd1498Szrj      might get the different hard register.  So we need to change
642*38fd1498Szrj      ALLOCNO_REG.  */
643*38fd1498Szrj   bitmap_and_compl (local_allocno_bitmap,
644*38fd1498Szrj 		    ira_curr_loop_tree_node->all_allocnos,
645*38fd1498Szrj 		    ira_curr_loop_tree_node->border_allocnos);
646*38fd1498Szrj   EXECUTE_IF_SET_IN_REG_SET (local_allocno_bitmap, 0, i, bi)
647*38fd1498Szrj     {
648*38fd1498Szrj       allocno = ira_allocnos[i];
649*38fd1498Szrj       regno = ALLOCNO_REGNO (allocno);
650*38fd1498Szrj       if (ALLOCNO_CAP_MEMBER (allocno) != NULL)
651*38fd1498Szrj 	continue;
652*38fd1498Szrj       used_p = !bitmap_set_bit (used_regno_bitmap, regno);
653*38fd1498Szrj       ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
654*38fd1498Szrj       if (! used_p)
655*38fd1498Szrj 	continue;
656*38fd1498Szrj       bitmap_set_bit (renamed_regno_bitmap, regno);
657*38fd1498Szrj       set_allocno_reg (allocno, ira_create_new_reg (allocno_emit_reg (allocno)));
658*38fd1498Szrj     }
659*38fd1498Szrj }
660*38fd1498Szrj 
661*38fd1498Szrj /* Process to set up flag somewhere_renamed_p.  */
662*38fd1498Szrj static void
set_allocno_somewhere_renamed_p(void)663*38fd1498Szrj set_allocno_somewhere_renamed_p (void)
664*38fd1498Szrj {
665*38fd1498Szrj   unsigned int regno;
666*38fd1498Szrj   ira_allocno_t allocno;
667*38fd1498Szrj   ira_allocno_iterator ai;
668*38fd1498Szrj 
669*38fd1498Szrj   FOR_EACH_ALLOCNO (allocno, ai)
670*38fd1498Szrj     {
671*38fd1498Szrj       regno = ALLOCNO_REGNO (allocno);
672*38fd1498Szrj       if (bitmap_bit_p (renamed_regno_bitmap, regno)
673*38fd1498Szrj 	  && REGNO (allocno_emit_reg (allocno)) == regno)
674*38fd1498Szrj 	ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
675*38fd1498Szrj     }
676*38fd1498Szrj }
677*38fd1498Szrj 
678*38fd1498Szrj /* Return TRUE if move lists on all edges given in vector VEC are
679*38fd1498Szrj    equal.  */
680*38fd1498Szrj static bool
eq_edge_move_lists_p(vec<edge,va_gc> * vec)681*38fd1498Szrj eq_edge_move_lists_p (vec<edge, va_gc> *vec)
682*38fd1498Szrj {
683*38fd1498Szrj   move_t list;
684*38fd1498Szrj   int i;
685*38fd1498Szrj 
686*38fd1498Szrj   list = (move_t) EDGE_I (vec, 0)->aux;
687*38fd1498Szrj   for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
688*38fd1498Szrj     if (! eq_move_lists_p (list, (move_t) EDGE_I (vec, i)->aux))
689*38fd1498Szrj       return false;
690*38fd1498Szrj   return true;
691*38fd1498Szrj }
692*38fd1498Szrj 
693*38fd1498Szrj /* Look at all entry edges (if START_P) or exit edges of basic block
694*38fd1498Szrj    BB and put move lists at the BB start or end if it is possible.  In
695*38fd1498Szrj    other words, this decreases code duplication of allocno moves.  */
696*38fd1498Szrj static void
unify_moves(basic_block bb,bool start_p)697*38fd1498Szrj unify_moves (basic_block bb, bool start_p)
698*38fd1498Szrj {
699*38fd1498Szrj   int i;
700*38fd1498Szrj   edge e;
701*38fd1498Szrj   move_t list;
702*38fd1498Szrj   vec<edge, va_gc> *vec;
703*38fd1498Szrj 
704*38fd1498Szrj   vec = (start_p ? bb->preds : bb->succs);
705*38fd1498Szrj   if (EDGE_COUNT (vec) == 0 || ! eq_edge_move_lists_p (vec))
706*38fd1498Szrj     return;
707*38fd1498Szrj   e = EDGE_I (vec, 0);
708*38fd1498Szrj   list = (move_t) e->aux;
709*38fd1498Szrj   if (! start_p && control_flow_insn_p (BB_END (bb)))
710*38fd1498Szrj     return;
711*38fd1498Szrj   e->aux = NULL;
712*38fd1498Szrj   for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
713*38fd1498Szrj     {
714*38fd1498Szrj       e = EDGE_I (vec, i);
715*38fd1498Szrj       free_move_list ((move_t) e->aux);
716*38fd1498Szrj       e->aux = NULL;
717*38fd1498Szrj     }
718*38fd1498Szrj   if (start_p)
719*38fd1498Szrj     at_bb_start[bb->index] = list;
720*38fd1498Szrj   else
721*38fd1498Szrj     at_bb_end[bb->index] = list;
722*38fd1498Szrj }
723*38fd1498Szrj 
724*38fd1498Szrj /* Last move (in move sequence being processed) setting up the
725*38fd1498Szrj    corresponding hard register.  */
726*38fd1498Szrj static move_t hard_regno_last_set[FIRST_PSEUDO_REGISTER];
727*38fd1498Szrj 
728*38fd1498Szrj /* If the element value is equal to CURR_TICK then the corresponding
729*38fd1498Szrj    element in `hard_regno_last_set' is defined and correct.  */
730*38fd1498Szrj static int hard_regno_last_set_check[FIRST_PSEUDO_REGISTER];
731*38fd1498Szrj 
732*38fd1498Szrj /* Last move (in move sequence being processed) setting up the
733*38fd1498Szrj    corresponding allocno.  */
734*38fd1498Szrj static move_t *allocno_last_set;
735*38fd1498Szrj 
736*38fd1498Szrj /* If the element value is equal to CURR_TICK then the corresponding
737*38fd1498Szrj    element in . `allocno_last_set' is defined and correct.  */
738*38fd1498Szrj static int *allocno_last_set_check;
739*38fd1498Szrj 
740*38fd1498Szrj /* Definition of vector of moves.  */
741*38fd1498Szrj 
742*38fd1498Szrj /* This vec contains moves sorted topologically (depth-first) on their
743*38fd1498Szrj    dependency graph.  */
744*38fd1498Szrj static vec<move_t> move_vec;
745*38fd1498Szrj 
746*38fd1498Szrj /* The variable value is used to check correctness of values of
747*38fd1498Szrj    elements of arrays `hard_regno_last_set' and
748*38fd1498Szrj    `allocno_last_set_check'.  */
749*38fd1498Szrj static int curr_tick;
750*38fd1498Szrj 
751*38fd1498Szrj /* This recursive function traverses dependencies of MOVE and produces
752*38fd1498Szrj    topological sorting (in depth-first order).  */
753*38fd1498Szrj static void
traverse_moves(move_t move)754*38fd1498Szrj traverse_moves (move_t move)
755*38fd1498Szrj {
756*38fd1498Szrj   int i;
757*38fd1498Szrj 
758*38fd1498Szrj   if (move->visited_p)
759*38fd1498Szrj     return;
760*38fd1498Szrj   move->visited_p = true;
761*38fd1498Szrj   for (i = move->deps_num - 1; i >= 0; i--)
762*38fd1498Szrj     traverse_moves (move->deps[i]);
763*38fd1498Szrj   move_vec.safe_push (move);
764*38fd1498Szrj }
765*38fd1498Szrj 
766*38fd1498Szrj /* Remove unnecessary moves in the LIST, makes topological sorting,
767*38fd1498Szrj    and removes cycles on hard reg dependencies by introducing new
768*38fd1498Szrj    allocnos assigned to memory and additional moves.  It returns the
769*38fd1498Szrj    result move list.  */
770*38fd1498Szrj static move_t
modify_move_list(move_t list)771*38fd1498Szrj modify_move_list (move_t list)
772*38fd1498Szrj {
773*38fd1498Szrj   int i, n, nregs, hard_regno;
774*38fd1498Szrj   ira_allocno_t to, from;
775*38fd1498Szrj   move_t move, new_move, set_move, first, last;
776*38fd1498Szrj 
777*38fd1498Szrj   if (list == NULL)
778*38fd1498Szrj     return NULL;
779*38fd1498Szrj   /* Create move deps.  */
780*38fd1498Szrj   curr_tick++;
781*38fd1498Szrj   for (move = list; move != NULL; move = move->next)
782*38fd1498Szrj     {
783*38fd1498Szrj       to = move->to;
784*38fd1498Szrj       if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
785*38fd1498Szrj 	continue;
786*38fd1498Szrj       nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
787*38fd1498Szrj       for (i = 0; i < nregs; i++)
788*38fd1498Szrj 	{
789*38fd1498Szrj 	  hard_regno_last_set[hard_regno + i] = move;
790*38fd1498Szrj 	  hard_regno_last_set_check[hard_regno + i] = curr_tick;
791*38fd1498Szrj 	}
792*38fd1498Szrj     }
793*38fd1498Szrj   for (move = list; move != NULL; move = move->next)
794*38fd1498Szrj     {
795*38fd1498Szrj       from = move->from;
796*38fd1498Szrj       to = move->to;
797*38fd1498Szrj       if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
798*38fd1498Szrj 	{
799*38fd1498Szrj 	  nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
800*38fd1498Szrj 	  for (n = i = 0; i < nregs; i++)
801*38fd1498Szrj 	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
802*38fd1498Szrj 		&& (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
803*38fd1498Szrj 		    != ALLOCNO_REGNO (from)))
804*38fd1498Szrj 	      n++;
805*38fd1498Szrj 	  move->deps = (move_t *) ira_allocate (n * sizeof (move_t));
806*38fd1498Szrj 	  for (n = i = 0; i < nregs; i++)
807*38fd1498Szrj 	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
808*38fd1498Szrj 		&& (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
809*38fd1498Szrj 		    != ALLOCNO_REGNO (from)))
810*38fd1498Szrj 	      move->deps[n++] = hard_regno_last_set[hard_regno + i];
811*38fd1498Szrj 	  move->deps_num = n;
812*38fd1498Szrj 	}
813*38fd1498Szrj     }
814*38fd1498Szrj   /* Topological sorting:  */
815*38fd1498Szrj   move_vec.truncate (0);
816*38fd1498Szrj   for (move = list; move != NULL; move = move->next)
817*38fd1498Szrj     traverse_moves (move);
818*38fd1498Szrj   last = NULL;
819*38fd1498Szrj   for (i = (int) move_vec.length () - 1; i >= 0; i--)
820*38fd1498Szrj     {
821*38fd1498Szrj       move = move_vec[i];
822*38fd1498Szrj       move->next = NULL;
823*38fd1498Szrj       if (last != NULL)
824*38fd1498Szrj 	last->next = move;
825*38fd1498Szrj       last = move;
826*38fd1498Szrj     }
827*38fd1498Szrj   first = move_vec.last ();
828*38fd1498Szrj   /* Removing cycles:  */
829*38fd1498Szrj   curr_tick++;
830*38fd1498Szrj   move_vec.truncate (0);
831*38fd1498Szrj   for (move = first; move != NULL; move = move->next)
832*38fd1498Szrj     {
833*38fd1498Szrj       from = move->from;
834*38fd1498Szrj       to = move->to;
835*38fd1498Szrj       if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
836*38fd1498Szrj 	{
837*38fd1498Szrj 	  nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
838*38fd1498Szrj 	  for (i = 0; i < nregs; i++)
839*38fd1498Szrj 	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
840*38fd1498Szrj 		&& ALLOCNO_HARD_REGNO
841*38fd1498Szrj 		   (hard_regno_last_set[hard_regno + i]->to) >= 0)
842*38fd1498Szrj 	      {
843*38fd1498Szrj 		int n, j;
844*38fd1498Szrj 		ira_allocno_t new_allocno;
845*38fd1498Szrj 
846*38fd1498Szrj 		set_move = hard_regno_last_set[hard_regno + i];
847*38fd1498Szrj 		/* It does not matter what loop_tree_node (of TO or
848*38fd1498Szrj 		   FROM) to use for the new allocno because of
849*38fd1498Szrj 		   subsequent IRA internal representation
850*38fd1498Szrj 		   flattening.  */
851*38fd1498Szrj 		new_allocno
852*38fd1498Szrj 		  = create_new_allocno (ALLOCNO_REGNO (set_move->to),
853*38fd1498Szrj 					ALLOCNO_LOOP_TREE_NODE (set_move->to));
854*38fd1498Szrj 		ALLOCNO_MODE (new_allocno) = ALLOCNO_MODE (set_move->to);
855*38fd1498Szrj 		ira_set_allocno_class (new_allocno,
856*38fd1498Szrj 				       ALLOCNO_CLASS (set_move->to));
857*38fd1498Szrj 		ira_create_allocno_objects (new_allocno);
858*38fd1498Szrj 		ALLOCNO_ASSIGNED_P (new_allocno) = true;
859*38fd1498Szrj 		ALLOCNO_HARD_REGNO (new_allocno) = -1;
860*38fd1498Szrj 		ALLOCNO_EMIT_DATA (new_allocno)->reg
861*38fd1498Szrj 		  = ira_create_new_reg (allocno_emit_reg (set_move->to));
862*38fd1498Szrj 
863*38fd1498Szrj 		/* Make it possibly conflicting with all earlier
864*38fd1498Szrj 		   created allocnos.  Cases where temporary allocnos
865*38fd1498Szrj 		   created to remove the cycles are quite rare.  */
866*38fd1498Szrj 		n = ALLOCNO_NUM_OBJECTS (new_allocno);
867*38fd1498Szrj 		gcc_assert (n == ALLOCNO_NUM_OBJECTS (set_move->to));
868*38fd1498Szrj 		for (j = 0; j < n; j++)
869*38fd1498Szrj 		  {
870*38fd1498Szrj 		    ira_object_t new_obj = ALLOCNO_OBJECT (new_allocno, j);
871*38fd1498Szrj 
872*38fd1498Szrj 		    OBJECT_MIN (new_obj) = 0;
873*38fd1498Szrj 		    OBJECT_MAX (new_obj) = ira_objects_num - 1;
874*38fd1498Szrj 		  }
875*38fd1498Szrj 
876*38fd1498Szrj 		new_move = create_move (set_move->to, new_allocno);
877*38fd1498Szrj 		set_move->to = new_allocno;
878*38fd1498Szrj 		move_vec.safe_push (new_move);
879*38fd1498Szrj 		ira_move_loops_num++;
880*38fd1498Szrj 		if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
881*38fd1498Szrj 		  fprintf (ira_dump_file,
882*38fd1498Szrj 			   "    Creating temporary allocno a%dr%d\n",
883*38fd1498Szrj 			   ALLOCNO_NUM (new_allocno),
884*38fd1498Szrj 			   REGNO (allocno_emit_reg (new_allocno)));
885*38fd1498Szrj 	      }
886*38fd1498Szrj 	}
887*38fd1498Szrj       if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
888*38fd1498Szrj 	continue;
889*38fd1498Szrj       nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
890*38fd1498Szrj       for (i = 0; i < nregs; i++)
891*38fd1498Szrj 	{
892*38fd1498Szrj 	  hard_regno_last_set[hard_regno + i] = move;
893*38fd1498Szrj 	  hard_regno_last_set_check[hard_regno + i] = curr_tick;
894*38fd1498Szrj 	}
895*38fd1498Szrj     }
896*38fd1498Szrj   for (i = (int) move_vec.length () - 1; i >= 0; i--)
897*38fd1498Szrj     {
898*38fd1498Szrj       move = move_vec[i];
899*38fd1498Szrj       move->next = NULL;
900*38fd1498Szrj       last->next = move;
901*38fd1498Szrj       last = move;
902*38fd1498Szrj     }
903*38fd1498Szrj   return first;
904*38fd1498Szrj }
905*38fd1498Szrj 
906*38fd1498Szrj /* Generate RTX move insns from the move list LIST.  This updates
907*38fd1498Szrj    allocation cost using move execution frequency FREQ.  */
908*38fd1498Szrj static rtx_insn *
emit_move_list(move_t list,int freq)909*38fd1498Szrj emit_move_list (move_t list, int freq)
910*38fd1498Szrj {
911*38fd1498Szrj   rtx to, from, dest;
912*38fd1498Szrj   int to_regno, from_regno, cost, regno;
913*38fd1498Szrj   rtx_insn *result, *insn;
914*38fd1498Szrj   rtx set;
915*38fd1498Szrj   machine_mode mode;
916*38fd1498Szrj   enum reg_class aclass;
917*38fd1498Szrj 
918*38fd1498Szrj   grow_reg_equivs ();
919*38fd1498Szrj   start_sequence ();
920*38fd1498Szrj   for (; list != NULL; list = list->next)
921*38fd1498Szrj     {
922*38fd1498Szrj       start_sequence ();
923*38fd1498Szrj       to = allocno_emit_reg (list->to);
924*38fd1498Szrj       to_regno = REGNO (to);
925*38fd1498Szrj       from = allocno_emit_reg (list->from);
926*38fd1498Szrj       from_regno = REGNO (from);
927*38fd1498Szrj       emit_move_insn (to, from);
928*38fd1498Szrj       list->insn = get_insns ();
929*38fd1498Szrj       end_sequence ();
930*38fd1498Szrj       for (insn = list->insn; insn != NULL_RTX; insn = NEXT_INSN (insn))
931*38fd1498Szrj 	{
932*38fd1498Szrj 	  /* The reload needs to have set up insn codes.  If the
933*38fd1498Szrj 	     reload sets up insn codes by itself, it may fail because
934*38fd1498Szrj 	     insns will have hard registers instead of pseudos and
935*38fd1498Szrj 	     there may be no machine insn with given hard
936*38fd1498Szrj 	     registers.  */
937*38fd1498Szrj 	  recog_memoized (insn);
938*38fd1498Szrj 	  /* Add insn to equiv init insn list if it is necessary.
939*38fd1498Szrj 	     Otherwise reload will not remove this insn if it decides
940*38fd1498Szrj 	     to use the equivalence.  */
941*38fd1498Szrj 	  if ((set = single_set (insn)) != NULL_RTX)
942*38fd1498Szrj 	    {
943*38fd1498Szrj 	      dest = SET_DEST (set);
944*38fd1498Szrj 	      if (GET_CODE (dest) == SUBREG)
945*38fd1498Szrj 		dest = SUBREG_REG (dest);
946*38fd1498Szrj 	      ira_assert (REG_P (dest));
947*38fd1498Szrj 	      regno = REGNO (dest);
948*38fd1498Szrj 	      if (regno >= ira_reg_equiv_len
949*38fd1498Szrj 		  || (ira_reg_equiv[regno].invariant == NULL_RTX
950*38fd1498Szrj 		      && ira_reg_equiv[regno].constant == NULL_RTX))
951*38fd1498Szrj 		continue; /* regno has no equivalence.  */
952*38fd1498Szrj 	      ira_assert ((int) reg_equivs->length () > regno);
953*38fd1498Szrj 	      reg_equiv_init (regno)
954*38fd1498Szrj 		= gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init (regno));
955*38fd1498Szrj 	    }
956*38fd1498Szrj 	}
957*38fd1498Szrj       if (ira_use_lra_p)
958*38fd1498Szrj 	ira_update_equiv_info_by_shuffle_insn (to_regno, from_regno, list->insn);
959*38fd1498Szrj       emit_insn (list->insn);
960*38fd1498Szrj       mode = ALLOCNO_MODE (list->to);
961*38fd1498Szrj       aclass = ALLOCNO_CLASS (list->to);
962*38fd1498Szrj       cost = 0;
963*38fd1498Szrj       if (ALLOCNO_HARD_REGNO (list->to) < 0)
964*38fd1498Szrj 	{
965*38fd1498Szrj 	  if (ALLOCNO_HARD_REGNO (list->from) >= 0)
966*38fd1498Szrj 	    {
967*38fd1498Szrj 	      cost = ira_memory_move_cost[mode][aclass][0] * freq;
968*38fd1498Szrj 	      ira_store_cost += cost;
969*38fd1498Szrj 	    }
970*38fd1498Szrj 	}
971*38fd1498Szrj       else if (ALLOCNO_HARD_REGNO (list->from) < 0)
972*38fd1498Szrj 	{
973*38fd1498Szrj 	  if (ALLOCNO_HARD_REGNO (list->to) >= 0)
974*38fd1498Szrj 	    {
975*38fd1498Szrj 	      cost = ira_memory_move_cost[mode][aclass][0] * freq;
976*38fd1498Szrj 	      ira_load_cost += cost;
977*38fd1498Szrj 	    }
978*38fd1498Szrj 	}
979*38fd1498Szrj       else
980*38fd1498Szrj 	{
981*38fd1498Szrj 	  ira_init_register_move_cost_if_necessary (mode);
982*38fd1498Szrj 	  cost = ira_register_move_cost[mode][aclass][aclass] * freq;
983*38fd1498Szrj 	  ira_shuffle_cost += cost;
984*38fd1498Szrj 	}
985*38fd1498Szrj       ira_overall_cost += cost;
986*38fd1498Szrj     }
987*38fd1498Szrj   result = get_insns ();
988*38fd1498Szrj   end_sequence ();
989*38fd1498Szrj   return result;
990*38fd1498Szrj }
991*38fd1498Szrj 
992*38fd1498Szrj /* Generate RTX move insns from move lists attached to basic blocks
993*38fd1498Szrj    and edges.  */
994*38fd1498Szrj static void
emit_moves(void)995*38fd1498Szrj emit_moves (void)
996*38fd1498Szrj {
997*38fd1498Szrj   basic_block bb;
998*38fd1498Szrj   edge_iterator ei;
999*38fd1498Szrj   edge e;
1000*38fd1498Szrj   rtx_insn *insns, *tmp;
1001*38fd1498Szrj 
1002*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1003*38fd1498Szrj     {
1004*38fd1498Szrj       if (at_bb_start[bb->index] != NULL)
1005*38fd1498Szrj 	{
1006*38fd1498Szrj 	  at_bb_start[bb->index] = modify_move_list (at_bb_start[bb->index]);
1007*38fd1498Szrj 	  insns = emit_move_list (at_bb_start[bb->index],
1008*38fd1498Szrj 				  REG_FREQ_FROM_BB (bb));
1009*38fd1498Szrj 	  tmp = BB_HEAD (bb);
1010*38fd1498Szrj 	  if (LABEL_P (tmp))
1011*38fd1498Szrj 	    tmp = NEXT_INSN (tmp);
1012*38fd1498Szrj 	  if (NOTE_INSN_BASIC_BLOCK_P (tmp))
1013*38fd1498Szrj 	    tmp = NEXT_INSN (tmp);
1014*38fd1498Szrj 	  if (tmp == BB_HEAD (bb))
1015*38fd1498Szrj 	    emit_insn_before (insns, tmp);
1016*38fd1498Szrj 	  else if (tmp != NULL_RTX)
1017*38fd1498Szrj 	    emit_insn_after (insns, PREV_INSN (tmp));
1018*38fd1498Szrj 	  else
1019*38fd1498Szrj 	    emit_insn_after (insns, get_last_insn ());
1020*38fd1498Szrj 	}
1021*38fd1498Szrj 
1022*38fd1498Szrj       if (at_bb_end[bb->index] != NULL)
1023*38fd1498Szrj 	{
1024*38fd1498Szrj 	  at_bb_end[bb->index] = modify_move_list (at_bb_end[bb->index]);
1025*38fd1498Szrj 	  insns = emit_move_list (at_bb_end[bb->index], REG_FREQ_FROM_BB (bb));
1026*38fd1498Szrj 	  ira_assert (! control_flow_insn_p (BB_END (bb)));
1027*38fd1498Szrj 	  emit_insn_after (insns, BB_END (bb));
1028*38fd1498Szrj 	}
1029*38fd1498Szrj 
1030*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1031*38fd1498Szrj 	{
1032*38fd1498Szrj 	  if (e->aux == NULL)
1033*38fd1498Szrj 	    continue;
1034*38fd1498Szrj 	  ira_assert ((e->flags & EDGE_ABNORMAL) == 0
1035*38fd1498Szrj 		      || ! EDGE_CRITICAL_P (e));
1036*38fd1498Szrj 	  e->aux = modify_move_list ((move_t) e->aux);
1037*38fd1498Szrj 	  insert_insn_on_edge
1038*38fd1498Szrj 	    (emit_move_list ((move_t) e->aux,
1039*38fd1498Szrj 			     REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))),
1040*38fd1498Szrj 	     e);
1041*38fd1498Szrj 	  if (e->src->next_bb != e->dest)
1042*38fd1498Szrj 	    ira_additional_jumps_num++;
1043*38fd1498Szrj 	}
1044*38fd1498Szrj     }
1045*38fd1498Szrj }
1046*38fd1498Szrj 
1047*38fd1498Szrj /* Update costs of A and corresponding allocnos on upper levels on the
1048*38fd1498Szrj    loop tree from reading (if READ_P) or writing A on an execution
1049*38fd1498Szrj    path with FREQ.  */
1050*38fd1498Szrj static void
update_costs(ira_allocno_t a,bool read_p,int freq)1051*38fd1498Szrj update_costs (ira_allocno_t a, bool read_p, int freq)
1052*38fd1498Szrj {
1053*38fd1498Szrj   ira_loop_tree_node_t parent;
1054*38fd1498Szrj 
1055*38fd1498Szrj   for (;;)
1056*38fd1498Szrj     {
1057*38fd1498Szrj       ALLOCNO_NREFS (a)++;
1058*38fd1498Szrj       ALLOCNO_FREQ (a) += freq;
1059*38fd1498Szrj       ALLOCNO_MEMORY_COST (a)
1060*38fd1498Szrj 	+= (ira_memory_move_cost[ALLOCNO_MODE (a)][ALLOCNO_CLASS (a)]
1061*38fd1498Szrj 	    [read_p ? 1 : 0] * freq);
1062*38fd1498Szrj       if (ALLOCNO_CAP (a) != NULL)
1063*38fd1498Szrj 	a = ALLOCNO_CAP (a);
1064*38fd1498Szrj       else if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL
1065*38fd1498Szrj 	       || (a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) == NULL)
1066*38fd1498Szrj 	break;
1067*38fd1498Szrj     }
1068*38fd1498Szrj }
1069*38fd1498Szrj 
1070*38fd1498Szrj /* Process moves from LIST with execution FREQ to add ranges, copies,
1071*38fd1498Szrj    and modify costs for allocnos involved in the moves.  All regnos
1072*38fd1498Szrj    living through the list is in LIVE_THROUGH, and the loop tree node
1073*38fd1498Szrj    used to find corresponding allocnos is NODE.  */
1074*38fd1498Szrj static void
add_range_and_copies_from_move_list(move_t list,ira_loop_tree_node_t node,bitmap live_through,int freq)1075*38fd1498Szrj add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node,
1076*38fd1498Szrj 				     bitmap live_through, int freq)
1077*38fd1498Szrj {
1078*38fd1498Szrj   int start, n;
1079*38fd1498Szrj   unsigned int regno;
1080*38fd1498Szrj   move_t move;
1081*38fd1498Szrj   ira_allocno_t a;
1082*38fd1498Szrj   ira_copy_t cp;
1083*38fd1498Szrj   live_range_t r;
1084*38fd1498Szrj   bitmap_iterator bi;
1085*38fd1498Szrj   HARD_REG_SET hard_regs_live;
1086*38fd1498Szrj 
1087*38fd1498Szrj   if (list == NULL)
1088*38fd1498Szrj     return;
1089*38fd1498Szrj   n = 0;
1090*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1091*38fd1498Szrj     n++;
1092*38fd1498Szrj   REG_SET_TO_HARD_REG_SET (hard_regs_live, live_through);
1093*38fd1498Szrj   /* This is a trick to guarantee that new ranges is not merged with
1094*38fd1498Szrj      the old ones.  */
1095*38fd1498Szrj   ira_max_point++;
1096*38fd1498Szrj   start = ira_max_point;
1097*38fd1498Szrj   for (move = list; move != NULL; move = move->next)
1098*38fd1498Szrj     {
1099*38fd1498Szrj       ira_allocno_t from = move->from;
1100*38fd1498Szrj       ira_allocno_t to = move->to;
1101*38fd1498Szrj       int nr, i;
1102*38fd1498Szrj 
1103*38fd1498Szrj       bitmap_clear_bit (live_through, ALLOCNO_REGNO (from));
1104*38fd1498Szrj       bitmap_clear_bit (live_through, ALLOCNO_REGNO (to));
1105*38fd1498Szrj 
1106*38fd1498Szrj       nr = ALLOCNO_NUM_OBJECTS (to);
1107*38fd1498Szrj       for (i = 0; i < nr; i++)
1108*38fd1498Szrj 	{
1109*38fd1498Szrj 	  ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1110*38fd1498Szrj 	  if (OBJECT_CONFLICT_ARRAY (to_obj) == NULL)
1111*38fd1498Szrj 	    {
1112*38fd1498Szrj 	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1113*38fd1498Szrj 		fprintf (ira_dump_file, "    Allocate conflicts for a%dr%d\n",
1114*38fd1498Szrj 			 ALLOCNO_NUM (to), REGNO (allocno_emit_reg (to)));
1115*38fd1498Szrj 	      ira_allocate_object_conflicts (to_obj, n);
1116*38fd1498Szrj 	    }
1117*38fd1498Szrj 	}
1118*38fd1498Szrj       ior_hard_reg_conflicts (from, &hard_regs_live);
1119*38fd1498Szrj       ior_hard_reg_conflicts (to, &hard_regs_live);
1120*38fd1498Szrj 
1121*38fd1498Szrj       update_costs (from, true, freq);
1122*38fd1498Szrj       update_costs (to, false, freq);
1123*38fd1498Szrj       cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
1124*38fd1498Szrj       if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1125*38fd1498Szrj 	fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n",
1126*38fd1498Szrj 		 cp->num, ALLOCNO_NUM (cp->first),
1127*38fd1498Szrj 		 REGNO (allocno_emit_reg (cp->first)),
1128*38fd1498Szrj 		 ALLOCNO_NUM (cp->second),
1129*38fd1498Szrj 		 REGNO (allocno_emit_reg (cp->second)));
1130*38fd1498Szrj 
1131*38fd1498Szrj       nr = ALLOCNO_NUM_OBJECTS (from);
1132*38fd1498Szrj       for (i = 0; i < nr; i++)
1133*38fd1498Szrj 	{
1134*38fd1498Szrj 	  ira_object_t from_obj = ALLOCNO_OBJECT (from, i);
1135*38fd1498Szrj 	  r = OBJECT_LIVE_RANGES (from_obj);
1136*38fd1498Szrj 	  if (r == NULL || r->finish >= 0)
1137*38fd1498Szrj 	    {
1138*38fd1498Szrj 	      ira_add_live_range_to_object (from_obj, start, ira_max_point);
1139*38fd1498Szrj 	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1140*38fd1498Szrj 		fprintf (ira_dump_file,
1141*38fd1498Szrj 			 "    Adding range [%d..%d] to allocno a%dr%d\n",
1142*38fd1498Szrj 			 start, ira_max_point, ALLOCNO_NUM (from),
1143*38fd1498Szrj 			 REGNO (allocno_emit_reg (from)));
1144*38fd1498Szrj 	    }
1145*38fd1498Szrj 	  else
1146*38fd1498Szrj 	    {
1147*38fd1498Szrj 	      r->finish = ira_max_point;
1148*38fd1498Szrj 	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1149*38fd1498Szrj 		fprintf (ira_dump_file,
1150*38fd1498Szrj 			 "    Adding range [%d..%d] to allocno a%dr%d\n",
1151*38fd1498Szrj 			 r->start, ira_max_point, ALLOCNO_NUM (from),
1152*38fd1498Szrj 			 REGNO (allocno_emit_reg (from)));
1153*38fd1498Szrj 	    }
1154*38fd1498Szrj 	}
1155*38fd1498Szrj       ira_max_point++;
1156*38fd1498Szrj       nr = ALLOCNO_NUM_OBJECTS (to);
1157*38fd1498Szrj       for (i = 0; i < nr; i++)
1158*38fd1498Szrj 	{
1159*38fd1498Szrj 	  ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1160*38fd1498Szrj 	  ira_add_live_range_to_object (to_obj, ira_max_point, -1);
1161*38fd1498Szrj 	}
1162*38fd1498Szrj       ira_max_point++;
1163*38fd1498Szrj     }
1164*38fd1498Szrj   for (move = list; move != NULL; move = move->next)
1165*38fd1498Szrj     {
1166*38fd1498Szrj       int nr, i;
1167*38fd1498Szrj       nr = ALLOCNO_NUM_OBJECTS (move->to);
1168*38fd1498Szrj       for (i = 0; i < nr; i++)
1169*38fd1498Szrj 	{
1170*38fd1498Szrj 	  ira_object_t to_obj = ALLOCNO_OBJECT (move->to, i);
1171*38fd1498Szrj 	  r = OBJECT_LIVE_RANGES (to_obj);
1172*38fd1498Szrj 	  if (r->finish < 0)
1173*38fd1498Szrj 	    {
1174*38fd1498Szrj 	      r->finish = ira_max_point - 1;
1175*38fd1498Szrj 	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1176*38fd1498Szrj 		fprintf (ira_dump_file,
1177*38fd1498Szrj 			 "    Adding range [%d..%d] to allocno a%dr%d\n",
1178*38fd1498Szrj 			 r->start, r->finish, ALLOCNO_NUM (move->to),
1179*38fd1498Szrj 			 REGNO (allocno_emit_reg (move->to)));
1180*38fd1498Szrj 	    }
1181*38fd1498Szrj 	}
1182*38fd1498Szrj     }
1183*38fd1498Szrj   EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1184*38fd1498Szrj     {
1185*38fd1498Szrj       ira_allocno_t to;
1186*38fd1498Szrj       int nr, i;
1187*38fd1498Szrj 
1188*38fd1498Szrj       a = node->regno_allocno_map[regno];
1189*38fd1498Szrj       if ((to = ALLOCNO_EMIT_DATA (a)->mem_optimized_dest) != NULL)
1190*38fd1498Szrj 	a = to;
1191*38fd1498Szrj       nr = ALLOCNO_NUM_OBJECTS (a);
1192*38fd1498Szrj       for (i = 0; i < nr; i++)
1193*38fd1498Szrj 	{
1194*38fd1498Szrj 	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
1195*38fd1498Szrj 	  ira_add_live_range_to_object (obj, start, ira_max_point - 1);
1196*38fd1498Szrj 	}
1197*38fd1498Szrj       if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1198*38fd1498Szrj 	fprintf
1199*38fd1498Szrj 	  (ira_dump_file,
1200*38fd1498Szrj 	   "    Adding range [%d..%d] to live through %s allocno a%dr%d\n",
1201*38fd1498Szrj 	   start, ira_max_point - 1,
1202*38fd1498Szrj 	   to != NULL ? "upper level" : "",
1203*38fd1498Szrj 	   ALLOCNO_NUM (a), REGNO (allocno_emit_reg (a)));
1204*38fd1498Szrj     }
1205*38fd1498Szrj }
1206*38fd1498Szrj 
1207*38fd1498Szrj /* Process all move list to add ranges, conflicts, copies, and modify
1208*38fd1498Szrj    costs for allocnos involved in the moves.  */
1209*38fd1498Szrj static void
add_ranges_and_copies(void)1210*38fd1498Szrj add_ranges_and_copies (void)
1211*38fd1498Szrj {
1212*38fd1498Szrj   basic_block bb;
1213*38fd1498Szrj   edge_iterator ei;
1214*38fd1498Szrj   edge e;
1215*38fd1498Szrj   ira_loop_tree_node_t node;
1216*38fd1498Szrj   bitmap live_through;
1217*38fd1498Szrj 
1218*38fd1498Szrj   live_through = ira_allocate_bitmap ();
1219*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1220*38fd1498Szrj     {
1221*38fd1498Szrj       /* It does not matter what loop_tree_node (of source or
1222*38fd1498Szrj 	 destination block) to use for searching allocnos by their
1223*38fd1498Szrj 	 regnos because of subsequent IR flattening.  */
1224*38fd1498Szrj       node = IRA_BB_NODE (bb)->parent;
1225*38fd1498Szrj       bitmap_copy (live_through, df_get_live_in (bb));
1226*38fd1498Szrj       add_range_and_copies_from_move_list
1227*38fd1498Szrj 	(at_bb_start[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1228*38fd1498Szrj       bitmap_copy (live_through, df_get_live_out (bb));
1229*38fd1498Szrj       add_range_and_copies_from_move_list
1230*38fd1498Szrj 	(at_bb_end[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1231*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1232*38fd1498Szrj 	{
1233*38fd1498Szrj 	  bitmap_and (live_through,
1234*38fd1498Szrj 		      df_get_live_in (e->dest), df_get_live_out (bb));
1235*38fd1498Szrj 	  add_range_and_copies_from_move_list
1236*38fd1498Szrj 	    ((move_t) e->aux, node, live_through,
1237*38fd1498Szrj 	     REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e)));
1238*38fd1498Szrj 	}
1239*38fd1498Szrj     }
1240*38fd1498Szrj   ira_free_bitmap (live_through);
1241*38fd1498Szrj }
1242*38fd1498Szrj 
1243*38fd1498Szrj /* The entry function changes code and generates shuffling allocnos on
1244*38fd1498Szrj    region borders for the regional (LOOPS_P is TRUE in this case)
1245*38fd1498Szrj    register allocation.  */
1246*38fd1498Szrj void
ira_emit(bool loops_p)1247*38fd1498Szrj ira_emit (bool loops_p)
1248*38fd1498Szrj {
1249*38fd1498Szrj   basic_block bb;
1250*38fd1498Szrj   rtx_insn *insn;
1251*38fd1498Szrj   edge_iterator ei;
1252*38fd1498Szrj   edge e;
1253*38fd1498Szrj   ira_allocno_t a;
1254*38fd1498Szrj   ira_allocno_iterator ai;
1255*38fd1498Szrj   size_t sz;
1256*38fd1498Szrj 
1257*38fd1498Szrj   FOR_EACH_ALLOCNO (a, ai)
1258*38fd1498Szrj     ALLOCNO_EMIT_DATA (a)->reg = regno_reg_rtx[ALLOCNO_REGNO (a)];
1259*38fd1498Szrj   if (! loops_p)
1260*38fd1498Szrj     return;
1261*38fd1498Szrj   sz = sizeof (move_t) * last_basic_block_for_fn (cfun);
1262*38fd1498Szrj   at_bb_start = (move_t *) ira_allocate (sz);
1263*38fd1498Szrj   memset (at_bb_start, 0, sz);
1264*38fd1498Szrj   at_bb_end = (move_t *) ira_allocate (sz);
1265*38fd1498Szrj   memset (at_bb_end, 0, sz);
1266*38fd1498Szrj   local_allocno_bitmap = ira_allocate_bitmap ();
1267*38fd1498Szrj   used_regno_bitmap = ira_allocate_bitmap ();
1268*38fd1498Szrj   renamed_regno_bitmap = ira_allocate_bitmap ();
1269*38fd1498Szrj   max_regno_before_changing = max_reg_num ();
1270*38fd1498Szrj   ira_traverse_loop_tree (true, ira_loop_tree_root, change_loop, NULL);
1271*38fd1498Szrj   set_allocno_somewhere_renamed_p ();
1272*38fd1498Szrj   ira_free_bitmap (used_regno_bitmap);
1273*38fd1498Szrj   ira_free_bitmap (renamed_regno_bitmap);
1274*38fd1498Szrj   ira_free_bitmap (local_allocno_bitmap);
1275*38fd1498Szrj   setup_entered_from_non_parent_p ();
1276*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1277*38fd1498Szrj     {
1278*38fd1498Szrj       at_bb_start[bb->index] = NULL;
1279*38fd1498Szrj       at_bb_end[bb->index] = NULL;
1280*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1281*38fd1498Szrj 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1282*38fd1498Szrj 	  generate_edge_moves (e);
1283*38fd1498Szrj     }
1284*38fd1498Szrj   allocno_last_set
1285*38fd1498Szrj     = (move_t *) ira_allocate (sizeof (move_t) * max_reg_num ());
1286*38fd1498Szrj   allocno_last_set_check
1287*38fd1498Szrj     = (int *) ira_allocate (sizeof (int) * max_reg_num ());
1288*38fd1498Szrj   memset (allocno_last_set_check, 0, sizeof (int) * max_reg_num ());
1289*38fd1498Szrj   memset (hard_regno_last_set_check, 0, sizeof (hard_regno_last_set_check));
1290*38fd1498Szrj   curr_tick = 0;
1291*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1292*38fd1498Szrj     unify_moves (bb, true);
1293*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1294*38fd1498Szrj     unify_moves (bb, false);
1295*38fd1498Szrj   move_vec.create (ira_allocnos_num);
1296*38fd1498Szrj   emit_moves ();
1297*38fd1498Szrj   add_ranges_and_copies ();
1298*38fd1498Szrj   /* Clean up: */
1299*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1300*38fd1498Szrj     {
1301*38fd1498Szrj       free_move_list (at_bb_start[bb->index]);
1302*38fd1498Szrj       free_move_list (at_bb_end[bb->index]);
1303*38fd1498Szrj       FOR_EACH_EDGE (e, ei, bb->succs)
1304*38fd1498Szrj 	{
1305*38fd1498Szrj 	  free_move_list ((move_t) e->aux);
1306*38fd1498Szrj 	  e->aux = NULL;
1307*38fd1498Szrj 	}
1308*38fd1498Szrj     }
1309*38fd1498Szrj   move_vec.release ();
1310*38fd1498Szrj   ira_free (allocno_last_set_check);
1311*38fd1498Szrj   ira_free (allocno_last_set);
1312*38fd1498Szrj   commit_edge_insertions ();
1313*38fd1498Szrj   /* Fix insn codes.  It is necessary to do it before reload because
1314*38fd1498Szrj      reload assumes initial insn codes defined.  The insn codes can be
1315*38fd1498Szrj      invalidated by CFG infrastructure for example in jump
1316*38fd1498Szrj      redirection.  */
1317*38fd1498Szrj   FOR_EACH_BB_FN (bb, cfun)
1318*38fd1498Szrj     FOR_BB_INSNS_REVERSE (bb, insn)
1319*38fd1498Szrj       if (INSN_P (insn))
1320*38fd1498Szrj 	recog_memoized (insn);
1321*38fd1498Szrj   ira_free (at_bb_end);
1322*38fd1498Szrj   ira_free (at_bb_start);
1323*38fd1498Szrj }
1324