1*38fd1498Szrj /* Integrated Register Allocator. Changing code and generating moves.
2*38fd1498Szrj Copyright (C) 2006-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Vladimir Makarov <vmakarov@redhat.com>.
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj /* When we have more one region, we need to change the original RTL
22*38fd1498Szrj code after coloring. Let us consider two allocnos representing the
23*38fd1498Szrj same pseudo-register outside and inside a region respectively.
24*38fd1498Szrj They can get different hard-registers. The reload pass works on
25*38fd1498Szrj pseudo registers basis and there is no way to say the reload that
26*38fd1498Szrj pseudo could be in different registers and it is even more
27*38fd1498Szrj difficult to say in what places of the code the pseudo should have
28*38fd1498Szrj particular hard-registers. So in this case IRA has to create and
29*38fd1498Szrj use a new pseudo-register inside the region and adds code to move
30*38fd1498Szrj allocno values on the region's borders. This is done by the code
31*38fd1498Szrj in this file.
32*38fd1498Szrj
33*38fd1498Szrj The code makes top-down traversal of the regions and generate new
34*38fd1498Szrj pseudos and the move code on the region borders. In some
35*38fd1498Szrj complicated cases IRA can create a new pseudo used temporarily to
36*38fd1498Szrj move allocno values when a swap of values stored in two
37*38fd1498Szrj hard-registers is needed (e.g. two allocnos representing different
38*38fd1498Szrj pseudos outside region got respectively hard registers 1 and 2 and
39*38fd1498Szrj the corresponding allocnos inside the region got respectively hard
40*38fd1498Szrj registers 2 and 1). At this stage, the new pseudo is marked as
41*38fd1498Szrj spilled.
42*38fd1498Szrj
43*38fd1498Szrj IRA still creates the pseudo-register and the moves on the region
44*38fd1498Szrj borders even when the both corresponding allocnos were assigned to
45*38fd1498Szrj the same hard-register. It is done because, if the reload pass for
46*38fd1498Szrj some reason spills a pseudo-register representing the original
47*38fd1498Szrj pseudo outside or inside the region, the effect will be smaller
48*38fd1498Szrj because another pseudo will still be in the hard-register. In most
49*38fd1498Szrj cases, this is better then spilling the original pseudo in its
50*38fd1498Szrj whole live-range. If reload does not change the allocation for the
51*38fd1498Szrj two pseudo-registers, the trivial move will be removed by
52*38fd1498Szrj post-reload optimizations.
53*38fd1498Szrj
54*38fd1498Szrj IRA does not generate a new pseudo and moves for the allocno values
55*38fd1498Szrj if the both allocnos representing an original pseudo inside and
56*38fd1498Szrj outside region assigned to the same hard register when the register
57*38fd1498Szrj pressure in the region for the corresponding pressure class is less
58*38fd1498Szrj than number of available hard registers for given pressure class.
59*38fd1498Szrj
60*38fd1498Szrj IRA also does some optimizations to remove redundant moves which is
61*38fd1498Szrj transformed into stores by the reload pass on CFG edges
62*38fd1498Szrj representing exits from the region.
63*38fd1498Szrj
64*38fd1498Szrj IRA tries to reduce duplication of code generated on CFG edges
65*38fd1498Szrj which are enters and exits to/from regions by moving some code to
66*38fd1498Szrj the edge sources or destinations when it is possible. */
67*38fd1498Szrj
68*38fd1498Szrj #include "config.h"
69*38fd1498Szrj #include "system.h"
70*38fd1498Szrj #include "coretypes.h"
71*38fd1498Szrj #include "backend.h"
72*38fd1498Szrj #include "rtl.h"
73*38fd1498Szrj #include "tree.h"
74*38fd1498Szrj #include "predict.h"
75*38fd1498Szrj #include "df.h"
76*38fd1498Szrj #include "insn-config.h"
77*38fd1498Szrj #include "regs.h"
78*38fd1498Szrj #include "memmodel.h"
79*38fd1498Szrj #include "ira.h"
80*38fd1498Szrj #include "ira-int.h"
81*38fd1498Szrj #include "cfgrtl.h"
82*38fd1498Szrj #include "cfgbuild.h"
83*38fd1498Szrj #include "expr.h"
84*38fd1498Szrj #include "reload.h"
85*38fd1498Szrj #include "cfgloop.h"
86*38fd1498Szrj
87*38fd1498Szrj
88*38fd1498Szrj /* Data used to emit live range split insns and to flattening IR. */
89*38fd1498Szrj ira_emit_data_t ira_allocno_emit_data;
90*38fd1498Szrj
91*38fd1498Szrj /* Definitions for vectors of pointers. */
92*38fd1498Szrj typedef void *void_p;
93*38fd1498Szrj
94*38fd1498Szrj /* Pointers to data allocated for allocnos being created during
95*38fd1498Szrj emitting. Usually there are quite few such allocnos because they
96*38fd1498Szrj are created only for resolving loop in register shuffling. */
97*38fd1498Szrj static vec<void_p> new_allocno_emit_data_vec;
98*38fd1498Szrj
99*38fd1498Szrj /* Allocate and initiate the emit data. */
100*38fd1498Szrj void
ira_initiate_emit_data(void)101*38fd1498Szrj ira_initiate_emit_data (void)
102*38fd1498Szrj {
103*38fd1498Szrj ira_allocno_t a;
104*38fd1498Szrj ira_allocno_iterator ai;
105*38fd1498Szrj
106*38fd1498Szrj ira_allocno_emit_data
107*38fd1498Szrj = (ira_emit_data_t) ira_allocate (ira_allocnos_num
108*38fd1498Szrj * sizeof (struct ira_emit_data));
109*38fd1498Szrj memset (ira_allocno_emit_data, 0,
110*38fd1498Szrj ira_allocnos_num * sizeof (struct ira_emit_data));
111*38fd1498Szrj FOR_EACH_ALLOCNO (a, ai)
112*38fd1498Szrj ALLOCNO_ADD_DATA (a) = ira_allocno_emit_data + ALLOCNO_NUM (a);
113*38fd1498Szrj new_allocno_emit_data_vec.create (50);
114*38fd1498Szrj
115*38fd1498Szrj }
116*38fd1498Szrj
117*38fd1498Szrj /* Free the emit data. */
118*38fd1498Szrj void
ira_finish_emit_data(void)119*38fd1498Szrj ira_finish_emit_data (void)
120*38fd1498Szrj {
121*38fd1498Szrj void_p p;
122*38fd1498Szrj ira_allocno_t a;
123*38fd1498Szrj ira_allocno_iterator ai;
124*38fd1498Szrj
125*38fd1498Szrj ira_free (ira_allocno_emit_data);
126*38fd1498Szrj FOR_EACH_ALLOCNO (a, ai)
127*38fd1498Szrj ALLOCNO_ADD_DATA (a) = NULL;
128*38fd1498Szrj for (;new_allocno_emit_data_vec.length () != 0;)
129*38fd1498Szrj {
130*38fd1498Szrj p = new_allocno_emit_data_vec.pop ();
131*38fd1498Szrj ira_free (p);
132*38fd1498Szrj }
133*38fd1498Szrj new_allocno_emit_data_vec.release ();
134*38fd1498Szrj }
135*38fd1498Szrj
136*38fd1498Szrj /* Create and return a new allocno with given REGNO and
137*38fd1498Szrj LOOP_TREE_NODE. Allocate emit data for it. */
138*38fd1498Szrj static ira_allocno_t
create_new_allocno(int regno,ira_loop_tree_node_t loop_tree_node)139*38fd1498Szrj create_new_allocno (int regno, ira_loop_tree_node_t loop_tree_node)
140*38fd1498Szrj {
141*38fd1498Szrj ira_allocno_t a;
142*38fd1498Szrj
143*38fd1498Szrj a = ira_create_allocno (regno, false, loop_tree_node);
144*38fd1498Szrj ALLOCNO_ADD_DATA (a) = ira_allocate (sizeof (struct ira_emit_data));
145*38fd1498Szrj memset (ALLOCNO_ADD_DATA (a), 0, sizeof (struct ira_emit_data));
146*38fd1498Szrj new_allocno_emit_data_vec.safe_push (ALLOCNO_ADD_DATA (a));
147*38fd1498Szrj return a;
148*38fd1498Szrj }
149*38fd1498Szrj
150*38fd1498Szrj
151*38fd1498Szrj
152*38fd1498Szrj /* See comments below. */
153*38fd1498Szrj typedef struct move *move_t;
154*38fd1498Szrj
155*38fd1498Szrj /* The structure represents an allocno move. Both allocnos have the
156*38fd1498Szrj same original regno but different allocation. */
157*38fd1498Szrj struct move
158*38fd1498Szrj {
159*38fd1498Szrj /* The allocnos involved in the move. */
160*38fd1498Szrj ira_allocno_t from, to;
161*38fd1498Szrj /* The next move in the move sequence. */
162*38fd1498Szrj move_t next;
163*38fd1498Szrj /* Used for finding dependencies. */
164*38fd1498Szrj bool visited_p;
165*38fd1498Szrj /* The size of the following array. */
166*38fd1498Szrj int deps_num;
167*38fd1498Szrj /* Moves on which given move depends on. Dependency can be cyclic.
168*38fd1498Szrj It means we need a temporary to generates the moves. Sequence
169*38fd1498Szrj A1->A2, B1->B2 where A1 and B2 are assigned to reg R1 and A2 and
170*38fd1498Szrj B1 are assigned to reg R2 is an example of the cyclic
171*38fd1498Szrj dependencies. */
172*38fd1498Szrj move_t *deps;
173*38fd1498Szrj /* First insn generated for the move. */
174*38fd1498Szrj rtx_insn *insn;
175*38fd1498Szrj };
176*38fd1498Szrj
177*38fd1498Szrj /* Array of moves (indexed by BB index) which should be put at the
178*38fd1498Szrj start/end of the corresponding basic blocks. */
179*38fd1498Szrj static move_t *at_bb_start, *at_bb_end;
180*38fd1498Szrj
181*38fd1498Szrj /* Max regno before renaming some pseudo-registers. For example, the
182*38fd1498Szrj same pseudo-register can be renamed in a loop if its allocation is
183*38fd1498Szrj different outside the loop. */
184*38fd1498Szrj static int max_regno_before_changing;
185*38fd1498Szrj
186*38fd1498Szrj /* Return new move of allocnos TO and FROM. */
187*38fd1498Szrj static move_t
create_move(ira_allocno_t to,ira_allocno_t from)188*38fd1498Szrj create_move (ira_allocno_t to, ira_allocno_t from)
189*38fd1498Szrj {
190*38fd1498Szrj move_t move;
191*38fd1498Szrj
192*38fd1498Szrj move = (move_t) ira_allocate (sizeof (struct move));
193*38fd1498Szrj move->deps = NULL;
194*38fd1498Szrj move->deps_num = 0;
195*38fd1498Szrj move->to = to;
196*38fd1498Szrj move->from = from;
197*38fd1498Szrj move->next = NULL;
198*38fd1498Szrj move->insn = NULL;
199*38fd1498Szrj move->visited_p = false;
200*38fd1498Szrj return move;
201*38fd1498Szrj }
202*38fd1498Szrj
203*38fd1498Szrj /* Free memory for MOVE and its dependencies. */
204*38fd1498Szrj static void
free_move(move_t move)205*38fd1498Szrj free_move (move_t move)
206*38fd1498Szrj {
207*38fd1498Szrj if (move->deps != NULL)
208*38fd1498Szrj ira_free (move->deps);
209*38fd1498Szrj ira_free (move);
210*38fd1498Szrj }
211*38fd1498Szrj
212*38fd1498Szrj /* Free memory for list of the moves given by its HEAD. */
213*38fd1498Szrj static void
free_move_list(move_t head)214*38fd1498Szrj free_move_list (move_t head)
215*38fd1498Szrj {
216*38fd1498Szrj move_t next;
217*38fd1498Szrj
218*38fd1498Szrj for (; head != NULL; head = next)
219*38fd1498Szrj {
220*38fd1498Szrj next = head->next;
221*38fd1498Szrj free_move (head);
222*38fd1498Szrj }
223*38fd1498Szrj }
224*38fd1498Szrj
225*38fd1498Szrj /* Return TRUE if the move list LIST1 and LIST2 are equal (two
226*38fd1498Szrj moves are equal if they involve the same allocnos). */
227*38fd1498Szrj static bool
eq_move_lists_p(move_t list1,move_t list2)228*38fd1498Szrj eq_move_lists_p (move_t list1, move_t list2)
229*38fd1498Szrj {
230*38fd1498Szrj for (; list1 != NULL && list2 != NULL;
231*38fd1498Szrj list1 = list1->next, list2 = list2->next)
232*38fd1498Szrj if (list1->from != list2->from || list1->to != list2->to)
233*38fd1498Szrj return false;
234*38fd1498Szrj return list1 == list2;
235*38fd1498Szrj }
236*38fd1498Szrj
237*38fd1498Szrj /* Print move list LIST into file F. */
238*38fd1498Szrj static void
print_move_list(FILE * f,move_t list)239*38fd1498Szrj print_move_list (FILE *f, move_t list)
240*38fd1498Szrj {
241*38fd1498Szrj for (; list != NULL; list = list->next)
242*38fd1498Szrj fprintf (f, " a%dr%d->a%dr%d",
243*38fd1498Szrj ALLOCNO_NUM (list->from), ALLOCNO_REGNO (list->from),
244*38fd1498Szrj ALLOCNO_NUM (list->to), ALLOCNO_REGNO (list->to));
245*38fd1498Szrj fprintf (f, "\n");
246*38fd1498Szrj }
247*38fd1498Szrj
248*38fd1498Szrj extern void ira_debug_move_list (move_t list);
249*38fd1498Szrj
250*38fd1498Szrj /* Print move list LIST into stderr. */
251*38fd1498Szrj void
ira_debug_move_list(move_t list)252*38fd1498Szrj ira_debug_move_list (move_t list)
253*38fd1498Szrj {
254*38fd1498Szrj print_move_list (stderr, list);
255*38fd1498Szrj }
256*38fd1498Szrj
257*38fd1498Szrj /* This recursive function changes pseudo-registers in *LOC if it is
258*38fd1498Szrj necessary. The function returns TRUE if a change was done. */
259*38fd1498Szrj static bool
change_regs(rtx * loc)260*38fd1498Szrj change_regs (rtx *loc)
261*38fd1498Szrj {
262*38fd1498Szrj int i, regno, result = false;
263*38fd1498Szrj const char *fmt;
264*38fd1498Szrj enum rtx_code code;
265*38fd1498Szrj rtx reg;
266*38fd1498Szrj
267*38fd1498Szrj if (*loc == NULL_RTX)
268*38fd1498Szrj return false;
269*38fd1498Szrj code = GET_CODE (*loc);
270*38fd1498Szrj if (code == REG)
271*38fd1498Szrj {
272*38fd1498Szrj regno = REGNO (*loc);
273*38fd1498Szrj if (regno < FIRST_PSEUDO_REGISTER)
274*38fd1498Szrj return false;
275*38fd1498Szrj if (regno >= max_regno_before_changing)
276*38fd1498Szrj /* It is a shared register which was changed already. */
277*38fd1498Szrj return false;
278*38fd1498Szrj if (ira_curr_regno_allocno_map[regno] == NULL)
279*38fd1498Szrj return false;
280*38fd1498Szrj reg = allocno_emit_reg (ira_curr_regno_allocno_map[regno]);
281*38fd1498Szrj if (reg == *loc)
282*38fd1498Szrj return false;
283*38fd1498Szrj *loc = reg;
284*38fd1498Szrj return true;
285*38fd1498Szrj }
286*38fd1498Szrj
287*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
288*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
289*38fd1498Szrj {
290*38fd1498Szrj if (fmt[i] == 'e')
291*38fd1498Szrj result = change_regs (&XEXP (*loc, i)) || result;
292*38fd1498Szrj else if (fmt[i] == 'E')
293*38fd1498Szrj {
294*38fd1498Szrj int j;
295*38fd1498Szrj
296*38fd1498Szrj for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
297*38fd1498Szrj result = change_regs (&XVECEXP (*loc, i, j)) || result;
298*38fd1498Szrj }
299*38fd1498Szrj }
300*38fd1498Szrj return result;
301*38fd1498Szrj }
302*38fd1498Szrj
303*38fd1498Szrj static bool
change_regs_in_insn(rtx_insn ** insn_ptr)304*38fd1498Szrj change_regs_in_insn (rtx_insn **insn_ptr)
305*38fd1498Szrj {
306*38fd1498Szrj rtx rtx = *insn_ptr;
307*38fd1498Szrj bool result = change_regs (&rtx);
308*38fd1498Szrj *insn_ptr = as_a <rtx_insn *> (rtx);
309*38fd1498Szrj return result;
310*38fd1498Szrj }
311*38fd1498Szrj
312*38fd1498Szrj /* Attach MOVE to the edge E. The move is attached to the head of the
313*38fd1498Szrj list if HEAD_P is TRUE. */
314*38fd1498Szrj static void
add_to_edge_list(edge e,move_t move,bool head_p)315*38fd1498Szrj add_to_edge_list (edge e, move_t move, bool head_p)
316*38fd1498Szrj {
317*38fd1498Szrj move_t last;
318*38fd1498Szrj
319*38fd1498Szrj if (head_p || e->aux == NULL)
320*38fd1498Szrj {
321*38fd1498Szrj move->next = (move_t) e->aux;
322*38fd1498Szrj e->aux = move;
323*38fd1498Szrj }
324*38fd1498Szrj else
325*38fd1498Szrj {
326*38fd1498Szrj for (last = (move_t) e->aux; last->next != NULL; last = last->next)
327*38fd1498Szrj ;
328*38fd1498Szrj last->next = move;
329*38fd1498Szrj move->next = NULL;
330*38fd1498Szrj }
331*38fd1498Szrj }
332*38fd1498Szrj
333*38fd1498Szrj /* Create and return new pseudo-register with the same attributes as
334*38fd1498Szrj ORIGINAL_REG. */
335*38fd1498Szrj rtx
ira_create_new_reg(rtx original_reg)336*38fd1498Szrj ira_create_new_reg (rtx original_reg)
337*38fd1498Szrj {
338*38fd1498Szrj rtx new_reg;
339*38fd1498Szrj
340*38fd1498Szrj new_reg = gen_reg_rtx (GET_MODE (original_reg));
341*38fd1498Szrj ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (original_reg);
342*38fd1498Szrj REG_USERVAR_P (new_reg) = REG_USERVAR_P (original_reg);
343*38fd1498Szrj REG_POINTER (new_reg) = REG_POINTER (original_reg);
344*38fd1498Szrj REG_ATTRS (new_reg) = REG_ATTRS (original_reg);
345*38fd1498Szrj if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
346*38fd1498Szrj fprintf (ira_dump_file, " Creating newreg=%i from oldreg=%i\n",
347*38fd1498Szrj REGNO (new_reg), REGNO (original_reg));
348*38fd1498Szrj ira_expand_reg_equiv ();
349*38fd1498Szrj return new_reg;
350*38fd1498Szrj }
351*38fd1498Szrj
352*38fd1498Szrj /* Return TRUE if loop given by SUBNODE inside the loop given by
353*38fd1498Szrj NODE. */
354*38fd1498Szrj static bool
subloop_tree_node_p(ira_loop_tree_node_t subnode,ira_loop_tree_node_t node)355*38fd1498Szrj subloop_tree_node_p (ira_loop_tree_node_t subnode, ira_loop_tree_node_t node)
356*38fd1498Szrj {
357*38fd1498Szrj for (; subnode != NULL; subnode = subnode->parent)
358*38fd1498Szrj if (subnode == node)
359*38fd1498Szrj return true;
360*38fd1498Szrj return false;
361*38fd1498Szrj }
362*38fd1498Szrj
363*38fd1498Szrj /* Set up member `reg' to REG for allocnos which has the same regno as
364*38fd1498Szrj ALLOCNO and which are inside the loop corresponding to ALLOCNO. */
365*38fd1498Szrj static void
set_allocno_reg(ira_allocno_t allocno,rtx reg)366*38fd1498Szrj set_allocno_reg (ira_allocno_t allocno, rtx reg)
367*38fd1498Szrj {
368*38fd1498Szrj int regno;
369*38fd1498Szrj ira_allocno_t a;
370*38fd1498Szrj ira_loop_tree_node_t node;
371*38fd1498Szrj
372*38fd1498Szrj node = ALLOCNO_LOOP_TREE_NODE (allocno);
373*38fd1498Szrj for (a = ira_regno_allocno_map[ALLOCNO_REGNO (allocno)];
374*38fd1498Szrj a != NULL;
375*38fd1498Szrj a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
376*38fd1498Szrj if (subloop_tree_node_p (ALLOCNO_LOOP_TREE_NODE (a), node))
377*38fd1498Szrj ALLOCNO_EMIT_DATA (a)->reg = reg;
378*38fd1498Szrj for (a = ALLOCNO_CAP (allocno); a != NULL; a = ALLOCNO_CAP (a))
379*38fd1498Szrj ALLOCNO_EMIT_DATA (a)->reg = reg;
380*38fd1498Szrj regno = ALLOCNO_REGNO (allocno);
381*38fd1498Szrj for (a = allocno;;)
382*38fd1498Szrj {
383*38fd1498Szrj if (a == NULL || (a = ALLOCNO_CAP (a)) == NULL)
384*38fd1498Szrj {
385*38fd1498Szrj node = node->parent;
386*38fd1498Szrj if (node == NULL)
387*38fd1498Szrj break;
388*38fd1498Szrj a = node->regno_allocno_map[regno];
389*38fd1498Szrj }
390*38fd1498Szrj if (a == NULL)
391*38fd1498Szrj continue;
392*38fd1498Szrj if (ALLOCNO_EMIT_DATA (a)->child_renamed_p)
393*38fd1498Szrj break;
394*38fd1498Szrj ALLOCNO_EMIT_DATA (a)->child_renamed_p = true;
395*38fd1498Szrj }
396*38fd1498Szrj }
397*38fd1498Szrj
398*38fd1498Szrj /* Return true if there is an entry to given loop not from its parent
399*38fd1498Szrj (or grandparent) block. For example, it is possible for two
400*38fd1498Szrj adjacent loops inside another loop. */
401*38fd1498Szrj static bool
entered_from_non_parent_p(ira_loop_tree_node_t loop_node)402*38fd1498Szrj entered_from_non_parent_p (ira_loop_tree_node_t loop_node)
403*38fd1498Szrj {
404*38fd1498Szrj ira_loop_tree_node_t bb_node, src_loop_node, parent;
405*38fd1498Szrj edge e;
406*38fd1498Szrj edge_iterator ei;
407*38fd1498Szrj
408*38fd1498Szrj for (bb_node = loop_node->children;
409*38fd1498Szrj bb_node != NULL;
410*38fd1498Szrj bb_node = bb_node->next)
411*38fd1498Szrj if (bb_node->bb != NULL)
412*38fd1498Szrj {
413*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb_node->bb->preds)
414*38fd1498Szrj if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
415*38fd1498Szrj && (src_loop_node = IRA_BB_NODE (e->src)->parent) != loop_node)
416*38fd1498Szrj {
417*38fd1498Szrj for (parent = src_loop_node->parent;
418*38fd1498Szrj parent != NULL;
419*38fd1498Szrj parent = parent->parent)
420*38fd1498Szrj if (parent == loop_node)
421*38fd1498Szrj break;
422*38fd1498Szrj if (parent != NULL)
423*38fd1498Szrj /* That is an exit from a nested loop -- skip it. */
424*38fd1498Szrj continue;
425*38fd1498Szrj for (parent = loop_node->parent;
426*38fd1498Szrj parent != NULL;
427*38fd1498Szrj parent = parent->parent)
428*38fd1498Szrj if (src_loop_node == parent)
429*38fd1498Szrj break;
430*38fd1498Szrj if (parent == NULL)
431*38fd1498Szrj return true;
432*38fd1498Szrj }
433*38fd1498Szrj }
434*38fd1498Szrj return false;
435*38fd1498Szrj }
436*38fd1498Szrj
437*38fd1498Szrj /* Set up ENTERED_FROM_NON_PARENT_P for each loop region. */
438*38fd1498Szrj static void
setup_entered_from_non_parent_p(void)439*38fd1498Szrj setup_entered_from_non_parent_p (void)
440*38fd1498Szrj {
441*38fd1498Szrj unsigned int i;
442*38fd1498Szrj loop_p loop;
443*38fd1498Szrj
444*38fd1498Szrj ira_assert (current_loops != NULL);
445*38fd1498Szrj FOR_EACH_VEC_SAFE_ELT (get_loops (cfun), i, loop)
446*38fd1498Szrj if (ira_loop_nodes[i].regno_allocno_map != NULL)
447*38fd1498Szrj ira_loop_nodes[i].entered_from_non_parent_p
448*38fd1498Szrj = entered_from_non_parent_p (&ira_loop_nodes[i]);
449*38fd1498Szrj }
450*38fd1498Szrj
451*38fd1498Szrj /* Return TRUE if move of SRC_ALLOCNO (assigned to hard register) to
452*38fd1498Szrj DEST_ALLOCNO (assigned to memory) can be removed because it does
453*38fd1498Szrj not change value of the destination. One possible reason for this
454*38fd1498Szrj is the situation when SRC_ALLOCNO is not modified in the
455*38fd1498Szrj corresponding loop. */
456*38fd1498Szrj static bool
store_can_be_removed_p(ira_allocno_t src_allocno,ira_allocno_t dest_allocno)457*38fd1498Szrj store_can_be_removed_p (ira_allocno_t src_allocno, ira_allocno_t dest_allocno)
458*38fd1498Szrj {
459*38fd1498Szrj int regno, orig_regno;
460*38fd1498Szrj ira_allocno_t a;
461*38fd1498Szrj ira_loop_tree_node_t node;
462*38fd1498Szrj
463*38fd1498Szrj ira_assert (ALLOCNO_CAP_MEMBER (src_allocno) == NULL
464*38fd1498Szrj && ALLOCNO_CAP_MEMBER (dest_allocno) == NULL);
465*38fd1498Szrj orig_regno = ALLOCNO_REGNO (src_allocno);
466*38fd1498Szrj regno = REGNO (allocno_emit_reg (dest_allocno));
467*38fd1498Szrj for (node = ALLOCNO_LOOP_TREE_NODE (src_allocno);
468*38fd1498Szrj node != NULL;
469*38fd1498Szrj node = node->parent)
470*38fd1498Szrj {
471*38fd1498Szrj a = node->regno_allocno_map[orig_regno];
472*38fd1498Szrj ira_assert (a != NULL);
473*38fd1498Szrj if (REGNO (allocno_emit_reg (a)) == (unsigned) regno)
474*38fd1498Szrj /* We achieved the destination and everything is ok. */
475*38fd1498Szrj return true;
476*38fd1498Szrj else if (bitmap_bit_p (node->modified_regnos, orig_regno))
477*38fd1498Szrj return false;
478*38fd1498Szrj else if (node->entered_from_non_parent_p)
479*38fd1498Szrj /* If there is a path from a destination loop block to the
480*38fd1498Szrj source loop header containing basic blocks of non-parents
481*38fd1498Szrj (grandparents) of the source loop, we should have checked
482*38fd1498Szrj modifications of the pseudo on this path too to decide
483*38fd1498Szrj about possibility to remove the store. It could be done by
484*38fd1498Szrj solving a data-flow problem. Unfortunately such global
485*38fd1498Szrj solution would complicate IR flattening. Therefore we just
486*38fd1498Szrj prohibit removal of the store in such complicated case. */
487*38fd1498Szrj return false;
488*38fd1498Szrj }
489*38fd1498Szrj /* It is actually a loop entry -- do not remove the store. */
490*38fd1498Szrj return false;
491*38fd1498Szrj }
492*38fd1498Szrj
493*38fd1498Szrj /* Generate and attach moves to the edge E. This looks at the final
494*38fd1498Szrj regnos of allocnos living on the edge with the same original regno
495*38fd1498Szrj to figure out when moves should be generated. */
496*38fd1498Szrj static void
generate_edge_moves(edge e)497*38fd1498Szrj generate_edge_moves (edge e)
498*38fd1498Szrj {
499*38fd1498Szrj ira_loop_tree_node_t src_loop_node, dest_loop_node;
500*38fd1498Szrj unsigned int regno;
501*38fd1498Szrj bitmap_iterator bi;
502*38fd1498Szrj ira_allocno_t src_allocno, dest_allocno, *src_map, *dest_map;
503*38fd1498Szrj move_t move;
504*38fd1498Szrj bitmap regs_live_in_dest, regs_live_out_src;
505*38fd1498Szrj
506*38fd1498Szrj src_loop_node = IRA_BB_NODE (e->src)->parent;
507*38fd1498Szrj dest_loop_node = IRA_BB_NODE (e->dest)->parent;
508*38fd1498Szrj e->aux = NULL;
509*38fd1498Szrj if (src_loop_node == dest_loop_node)
510*38fd1498Szrj return;
511*38fd1498Szrj src_map = src_loop_node->regno_allocno_map;
512*38fd1498Szrj dest_map = dest_loop_node->regno_allocno_map;
513*38fd1498Szrj regs_live_in_dest = df_get_live_in (e->dest);
514*38fd1498Szrj regs_live_out_src = df_get_live_out (e->src);
515*38fd1498Szrj EXECUTE_IF_SET_IN_REG_SET (regs_live_in_dest,
516*38fd1498Szrj FIRST_PSEUDO_REGISTER, regno, bi)
517*38fd1498Szrj if (bitmap_bit_p (regs_live_out_src, regno))
518*38fd1498Szrj {
519*38fd1498Szrj src_allocno = src_map[regno];
520*38fd1498Szrj dest_allocno = dest_map[regno];
521*38fd1498Szrj if (REGNO (allocno_emit_reg (src_allocno))
522*38fd1498Szrj == REGNO (allocno_emit_reg (dest_allocno)))
523*38fd1498Szrj continue;
524*38fd1498Szrj /* Remove unnecessary stores at the region exit. We should do
525*38fd1498Szrj this for readonly memory for sure and this is guaranteed by
526*38fd1498Szrj that we never generate moves on region borders (see
527*38fd1498Szrj checking in function change_loop). */
528*38fd1498Szrj if (ALLOCNO_HARD_REGNO (dest_allocno) < 0
529*38fd1498Szrj && ALLOCNO_HARD_REGNO (src_allocno) >= 0
530*38fd1498Szrj && store_can_be_removed_p (src_allocno, dest_allocno))
531*38fd1498Szrj {
532*38fd1498Szrj ALLOCNO_EMIT_DATA (src_allocno)->mem_optimized_dest = dest_allocno;
533*38fd1498Szrj ALLOCNO_EMIT_DATA (dest_allocno)->mem_optimized_dest_p = true;
534*38fd1498Szrj if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
535*38fd1498Szrj fprintf (ira_dump_file, " Remove r%d:a%d->a%d(mem)\n",
536*38fd1498Szrj regno, ALLOCNO_NUM (src_allocno),
537*38fd1498Szrj ALLOCNO_NUM (dest_allocno));
538*38fd1498Szrj continue;
539*38fd1498Szrj }
540*38fd1498Szrj move = create_move (dest_allocno, src_allocno);
541*38fd1498Szrj add_to_edge_list (e, move, true);
542*38fd1498Szrj }
543*38fd1498Szrj }
544*38fd1498Szrj
545*38fd1498Szrj /* Bitmap of allocnos local for the current loop. */
546*38fd1498Szrj static bitmap local_allocno_bitmap;
547*38fd1498Szrj
548*38fd1498Szrj /* This bitmap is used to find that we need to generate and to use a
549*38fd1498Szrj new pseudo-register when processing allocnos with the same original
550*38fd1498Szrj regno. */
551*38fd1498Szrj static bitmap used_regno_bitmap;
552*38fd1498Szrj
553*38fd1498Szrj /* This bitmap contains regnos of allocnos which were renamed locally
554*38fd1498Szrj because the allocnos correspond to disjoint live ranges in loops
555*38fd1498Szrj with a common parent. */
556*38fd1498Szrj static bitmap renamed_regno_bitmap;
557*38fd1498Szrj
558*38fd1498Szrj /* Change (if necessary) pseudo-registers inside loop given by loop
559*38fd1498Szrj tree node NODE. */
560*38fd1498Szrj static void
change_loop(ira_loop_tree_node_t node)561*38fd1498Szrj change_loop (ira_loop_tree_node_t node)
562*38fd1498Szrj {
563*38fd1498Szrj bitmap_iterator bi;
564*38fd1498Szrj unsigned int i;
565*38fd1498Szrj int regno;
566*38fd1498Szrj bool used_p;
567*38fd1498Szrj ira_allocno_t allocno, parent_allocno, *map;
568*38fd1498Szrj rtx_insn *insn;
569*38fd1498Szrj rtx original_reg;
570*38fd1498Szrj enum reg_class aclass, pclass;
571*38fd1498Szrj ira_loop_tree_node_t parent;
572*38fd1498Szrj
573*38fd1498Szrj if (node != ira_loop_tree_root)
574*38fd1498Szrj {
575*38fd1498Szrj ira_assert (current_loops != NULL);
576*38fd1498Szrj
577*38fd1498Szrj if (node->bb != NULL)
578*38fd1498Szrj {
579*38fd1498Szrj FOR_BB_INSNS (node->bb, insn)
580*38fd1498Szrj if (INSN_P (insn) && change_regs_in_insn (&insn))
581*38fd1498Szrj {
582*38fd1498Szrj df_insn_rescan (insn);
583*38fd1498Szrj df_notes_rescan (insn);
584*38fd1498Szrj }
585*38fd1498Szrj return;
586*38fd1498Szrj }
587*38fd1498Szrj
588*38fd1498Szrj if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
589*38fd1498Szrj fprintf (ira_dump_file,
590*38fd1498Szrj " Changing RTL for loop %d (header bb%d)\n",
591*38fd1498Szrj node->loop_num, node->loop->header->index);
592*38fd1498Szrj
593*38fd1498Szrj parent = ira_curr_loop_tree_node->parent;
594*38fd1498Szrj map = parent->regno_allocno_map;
595*38fd1498Szrj EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos,
596*38fd1498Szrj 0, i, bi)
597*38fd1498Szrj {
598*38fd1498Szrj allocno = ira_allocnos[i];
599*38fd1498Szrj regno = ALLOCNO_REGNO (allocno);
600*38fd1498Szrj aclass = ALLOCNO_CLASS (allocno);
601*38fd1498Szrj pclass = ira_pressure_class_translate[aclass];
602*38fd1498Szrj parent_allocno = map[regno];
603*38fd1498Szrj ira_assert (regno < ira_reg_equiv_len);
604*38fd1498Szrj /* We generate the same hard register move because the
605*38fd1498Szrj reload pass can put an allocno into memory in this case
606*38fd1498Szrj we will have live range splitting. If it does not happen
607*38fd1498Szrj such the same hard register moves will be removed. The
608*38fd1498Szrj worst case when the both allocnos are put into memory by
609*38fd1498Szrj the reload is very rare. */
610*38fd1498Szrj if (parent_allocno != NULL
611*38fd1498Szrj && (ALLOCNO_HARD_REGNO (allocno)
612*38fd1498Szrj == ALLOCNO_HARD_REGNO (parent_allocno))
613*38fd1498Szrj && (ALLOCNO_HARD_REGNO (allocno) < 0
614*38fd1498Szrj || (parent->reg_pressure[pclass] + 1
615*38fd1498Szrj <= ira_class_hard_regs_num[pclass])
616*38fd1498Szrj || TEST_HARD_REG_BIT (ira_prohibited_mode_move_regs
617*38fd1498Szrj [ALLOCNO_MODE (allocno)],
618*38fd1498Szrj ALLOCNO_HARD_REGNO (allocno))
619*38fd1498Szrj /* don't create copies because reload can spill an
620*38fd1498Szrj allocno set by copy although the allocno will not
621*38fd1498Szrj get memory slot. */
622*38fd1498Szrj || ira_equiv_no_lvalue_p (regno)
623*38fd1498Szrj || (pic_offset_table_rtx != NULL
624*38fd1498Szrj && (ALLOCNO_REGNO (allocno)
625*38fd1498Szrj == (int) REGNO (pic_offset_table_rtx)))))
626*38fd1498Szrj continue;
627*38fd1498Szrj original_reg = allocno_emit_reg (allocno);
628*38fd1498Szrj if (parent_allocno == NULL
629*38fd1498Szrj || (REGNO (allocno_emit_reg (parent_allocno))
630*38fd1498Szrj == REGNO (original_reg)))
631*38fd1498Szrj {
632*38fd1498Szrj if (internal_flag_ira_verbose > 3 && ira_dump_file)
633*38fd1498Szrj fprintf (ira_dump_file, " %i vs parent %i:",
634*38fd1498Szrj ALLOCNO_HARD_REGNO (allocno),
635*38fd1498Szrj ALLOCNO_HARD_REGNO (parent_allocno));
636*38fd1498Szrj set_allocno_reg (allocno, ira_create_new_reg (original_reg));
637*38fd1498Szrj }
638*38fd1498Szrj }
639*38fd1498Szrj }
640*38fd1498Szrj /* Rename locals: Local allocnos with same regno in different loops
641*38fd1498Szrj might get the different hard register. So we need to change
642*38fd1498Szrj ALLOCNO_REG. */
643*38fd1498Szrj bitmap_and_compl (local_allocno_bitmap,
644*38fd1498Szrj ira_curr_loop_tree_node->all_allocnos,
645*38fd1498Szrj ira_curr_loop_tree_node->border_allocnos);
646*38fd1498Szrj EXECUTE_IF_SET_IN_REG_SET (local_allocno_bitmap, 0, i, bi)
647*38fd1498Szrj {
648*38fd1498Szrj allocno = ira_allocnos[i];
649*38fd1498Szrj regno = ALLOCNO_REGNO (allocno);
650*38fd1498Szrj if (ALLOCNO_CAP_MEMBER (allocno) != NULL)
651*38fd1498Szrj continue;
652*38fd1498Szrj used_p = !bitmap_set_bit (used_regno_bitmap, regno);
653*38fd1498Szrj ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
654*38fd1498Szrj if (! used_p)
655*38fd1498Szrj continue;
656*38fd1498Szrj bitmap_set_bit (renamed_regno_bitmap, regno);
657*38fd1498Szrj set_allocno_reg (allocno, ira_create_new_reg (allocno_emit_reg (allocno)));
658*38fd1498Szrj }
659*38fd1498Szrj }
660*38fd1498Szrj
661*38fd1498Szrj /* Process to set up flag somewhere_renamed_p. */
662*38fd1498Szrj static void
set_allocno_somewhere_renamed_p(void)663*38fd1498Szrj set_allocno_somewhere_renamed_p (void)
664*38fd1498Szrj {
665*38fd1498Szrj unsigned int regno;
666*38fd1498Szrj ira_allocno_t allocno;
667*38fd1498Szrj ira_allocno_iterator ai;
668*38fd1498Szrj
669*38fd1498Szrj FOR_EACH_ALLOCNO (allocno, ai)
670*38fd1498Szrj {
671*38fd1498Szrj regno = ALLOCNO_REGNO (allocno);
672*38fd1498Szrj if (bitmap_bit_p (renamed_regno_bitmap, regno)
673*38fd1498Szrj && REGNO (allocno_emit_reg (allocno)) == regno)
674*38fd1498Szrj ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
675*38fd1498Szrj }
676*38fd1498Szrj }
677*38fd1498Szrj
678*38fd1498Szrj /* Return TRUE if move lists on all edges given in vector VEC are
679*38fd1498Szrj equal. */
680*38fd1498Szrj static bool
eq_edge_move_lists_p(vec<edge,va_gc> * vec)681*38fd1498Szrj eq_edge_move_lists_p (vec<edge, va_gc> *vec)
682*38fd1498Szrj {
683*38fd1498Szrj move_t list;
684*38fd1498Szrj int i;
685*38fd1498Szrj
686*38fd1498Szrj list = (move_t) EDGE_I (vec, 0)->aux;
687*38fd1498Szrj for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
688*38fd1498Szrj if (! eq_move_lists_p (list, (move_t) EDGE_I (vec, i)->aux))
689*38fd1498Szrj return false;
690*38fd1498Szrj return true;
691*38fd1498Szrj }
692*38fd1498Szrj
693*38fd1498Szrj /* Look at all entry edges (if START_P) or exit edges of basic block
694*38fd1498Szrj BB and put move lists at the BB start or end if it is possible. In
695*38fd1498Szrj other words, this decreases code duplication of allocno moves. */
696*38fd1498Szrj static void
unify_moves(basic_block bb,bool start_p)697*38fd1498Szrj unify_moves (basic_block bb, bool start_p)
698*38fd1498Szrj {
699*38fd1498Szrj int i;
700*38fd1498Szrj edge e;
701*38fd1498Szrj move_t list;
702*38fd1498Szrj vec<edge, va_gc> *vec;
703*38fd1498Szrj
704*38fd1498Szrj vec = (start_p ? bb->preds : bb->succs);
705*38fd1498Szrj if (EDGE_COUNT (vec) == 0 || ! eq_edge_move_lists_p (vec))
706*38fd1498Szrj return;
707*38fd1498Szrj e = EDGE_I (vec, 0);
708*38fd1498Szrj list = (move_t) e->aux;
709*38fd1498Szrj if (! start_p && control_flow_insn_p (BB_END (bb)))
710*38fd1498Szrj return;
711*38fd1498Szrj e->aux = NULL;
712*38fd1498Szrj for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
713*38fd1498Szrj {
714*38fd1498Szrj e = EDGE_I (vec, i);
715*38fd1498Szrj free_move_list ((move_t) e->aux);
716*38fd1498Szrj e->aux = NULL;
717*38fd1498Szrj }
718*38fd1498Szrj if (start_p)
719*38fd1498Szrj at_bb_start[bb->index] = list;
720*38fd1498Szrj else
721*38fd1498Szrj at_bb_end[bb->index] = list;
722*38fd1498Szrj }
723*38fd1498Szrj
724*38fd1498Szrj /* Last move (in move sequence being processed) setting up the
725*38fd1498Szrj corresponding hard register. */
726*38fd1498Szrj static move_t hard_regno_last_set[FIRST_PSEUDO_REGISTER];
727*38fd1498Szrj
728*38fd1498Szrj /* If the element value is equal to CURR_TICK then the corresponding
729*38fd1498Szrj element in `hard_regno_last_set' is defined and correct. */
730*38fd1498Szrj static int hard_regno_last_set_check[FIRST_PSEUDO_REGISTER];
731*38fd1498Szrj
732*38fd1498Szrj /* Last move (in move sequence being processed) setting up the
733*38fd1498Szrj corresponding allocno. */
734*38fd1498Szrj static move_t *allocno_last_set;
735*38fd1498Szrj
736*38fd1498Szrj /* If the element value is equal to CURR_TICK then the corresponding
737*38fd1498Szrj element in . `allocno_last_set' is defined and correct. */
738*38fd1498Szrj static int *allocno_last_set_check;
739*38fd1498Szrj
740*38fd1498Szrj /* Definition of vector of moves. */
741*38fd1498Szrj
742*38fd1498Szrj /* This vec contains moves sorted topologically (depth-first) on their
743*38fd1498Szrj dependency graph. */
744*38fd1498Szrj static vec<move_t> move_vec;
745*38fd1498Szrj
746*38fd1498Szrj /* The variable value is used to check correctness of values of
747*38fd1498Szrj elements of arrays `hard_regno_last_set' and
748*38fd1498Szrj `allocno_last_set_check'. */
749*38fd1498Szrj static int curr_tick;
750*38fd1498Szrj
751*38fd1498Szrj /* This recursive function traverses dependencies of MOVE and produces
752*38fd1498Szrj topological sorting (in depth-first order). */
753*38fd1498Szrj static void
traverse_moves(move_t move)754*38fd1498Szrj traverse_moves (move_t move)
755*38fd1498Szrj {
756*38fd1498Szrj int i;
757*38fd1498Szrj
758*38fd1498Szrj if (move->visited_p)
759*38fd1498Szrj return;
760*38fd1498Szrj move->visited_p = true;
761*38fd1498Szrj for (i = move->deps_num - 1; i >= 0; i--)
762*38fd1498Szrj traverse_moves (move->deps[i]);
763*38fd1498Szrj move_vec.safe_push (move);
764*38fd1498Szrj }
765*38fd1498Szrj
766*38fd1498Szrj /* Remove unnecessary moves in the LIST, makes topological sorting,
767*38fd1498Szrj and removes cycles on hard reg dependencies by introducing new
768*38fd1498Szrj allocnos assigned to memory and additional moves. It returns the
769*38fd1498Szrj result move list. */
770*38fd1498Szrj static move_t
modify_move_list(move_t list)771*38fd1498Szrj modify_move_list (move_t list)
772*38fd1498Szrj {
773*38fd1498Szrj int i, n, nregs, hard_regno;
774*38fd1498Szrj ira_allocno_t to, from;
775*38fd1498Szrj move_t move, new_move, set_move, first, last;
776*38fd1498Szrj
777*38fd1498Szrj if (list == NULL)
778*38fd1498Szrj return NULL;
779*38fd1498Szrj /* Create move deps. */
780*38fd1498Szrj curr_tick++;
781*38fd1498Szrj for (move = list; move != NULL; move = move->next)
782*38fd1498Szrj {
783*38fd1498Szrj to = move->to;
784*38fd1498Szrj if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
785*38fd1498Szrj continue;
786*38fd1498Szrj nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
787*38fd1498Szrj for (i = 0; i < nregs; i++)
788*38fd1498Szrj {
789*38fd1498Szrj hard_regno_last_set[hard_regno + i] = move;
790*38fd1498Szrj hard_regno_last_set_check[hard_regno + i] = curr_tick;
791*38fd1498Szrj }
792*38fd1498Szrj }
793*38fd1498Szrj for (move = list; move != NULL; move = move->next)
794*38fd1498Szrj {
795*38fd1498Szrj from = move->from;
796*38fd1498Szrj to = move->to;
797*38fd1498Szrj if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
798*38fd1498Szrj {
799*38fd1498Szrj nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
800*38fd1498Szrj for (n = i = 0; i < nregs; i++)
801*38fd1498Szrj if (hard_regno_last_set_check[hard_regno + i] == curr_tick
802*38fd1498Szrj && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
803*38fd1498Szrj != ALLOCNO_REGNO (from)))
804*38fd1498Szrj n++;
805*38fd1498Szrj move->deps = (move_t *) ira_allocate (n * sizeof (move_t));
806*38fd1498Szrj for (n = i = 0; i < nregs; i++)
807*38fd1498Szrj if (hard_regno_last_set_check[hard_regno + i] == curr_tick
808*38fd1498Szrj && (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
809*38fd1498Szrj != ALLOCNO_REGNO (from)))
810*38fd1498Szrj move->deps[n++] = hard_regno_last_set[hard_regno + i];
811*38fd1498Szrj move->deps_num = n;
812*38fd1498Szrj }
813*38fd1498Szrj }
814*38fd1498Szrj /* Topological sorting: */
815*38fd1498Szrj move_vec.truncate (0);
816*38fd1498Szrj for (move = list; move != NULL; move = move->next)
817*38fd1498Szrj traverse_moves (move);
818*38fd1498Szrj last = NULL;
819*38fd1498Szrj for (i = (int) move_vec.length () - 1; i >= 0; i--)
820*38fd1498Szrj {
821*38fd1498Szrj move = move_vec[i];
822*38fd1498Szrj move->next = NULL;
823*38fd1498Szrj if (last != NULL)
824*38fd1498Szrj last->next = move;
825*38fd1498Szrj last = move;
826*38fd1498Szrj }
827*38fd1498Szrj first = move_vec.last ();
828*38fd1498Szrj /* Removing cycles: */
829*38fd1498Szrj curr_tick++;
830*38fd1498Szrj move_vec.truncate (0);
831*38fd1498Szrj for (move = first; move != NULL; move = move->next)
832*38fd1498Szrj {
833*38fd1498Szrj from = move->from;
834*38fd1498Szrj to = move->to;
835*38fd1498Szrj if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
836*38fd1498Szrj {
837*38fd1498Szrj nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
838*38fd1498Szrj for (i = 0; i < nregs; i++)
839*38fd1498Szrj if (hard_regno_last_set_check[hard_regno + i] == curr_tick
840*38fd1498Szrj && ALLOCNO_HARD_REGNO
841*38fd1498Szrj (hard_regno_last_set[hard_regno + i]->to) >= 0)
842*38fd1498Szrj {
843*38fd1498Szrj int n, j;
844*38fd1498Szrj ira_allocno_t new_allocno;
845*38fd1498Szrj
846*38fd1498Szrj set_move = hard_regno_last_set[hard_regno + i];
847*38fd1498Szrj /* It does not matter what loop_tree_node (of TO or
848*38fd1498Szrj FROM) to use for the new allocno because of
849*38fd1498Szrj subsequent IRA internal representation
850*38fd1498Szrj flattening. */
851*38fd1498Szrj new_allocno
852*38fd1498Szrj = create_new_allocno (ALLOCNO_REGNO (set_move->to),
853*38fd1498Szrj ALLOCNO_LOOP_TREE_NODE (set_move->to));
854*38fd1498Szrj ALLOCNO_MODE (new_allocno) = ALLOCNO_MODE (set_move->to);
855*38fd1498Szrj ira_set_allocno_class (new_allocno,
856*38fd1498Szrj ALLOCNO_CLASS (set_move->to));
857*38fd1498Szrj ira_create_allocno_objects (new_allocno);
858*38fd1498Szrj ALLOCNO_ASSIGNED_P (new_allocno) = true;
859*38fd1498Szrj ALLOCNO_HARD_REGNO (new_allocno) = -1;
860*38fd1498Szrj ALLOCNO_EMIT_DATA (new_allocno)->reg
861*38fd1498Szrj = ira_create_new_reg (allocno_emit_reg (set_move->to));
862*38fd1498Szrj
863*38fd1498Szrj /* Make it possibly conflicting with all earlier
864*38fd1498Szrj created allocnos. Cases where temporary allocnos
865*38fd1498Szrj created to remove the cycles are quite rare. */
866*38fd1498Szrj n = ALLOCNO_NUM_OBJECTS (new_allocno);
867*38fd1498Szrj gcc_assert (n == ALLOCNO_NUM_OBJECTS (set_move->to));
868*38fd1498Szrj for (j = 0; j < n; j++)
869*38fd1498Szrj {
870*38fd1498Szrj ira_object_t new_obj = ALLOCNO_OBJECT (new_allocno, j);
871*38fd1498Szrj
872*38fd1498Szrj OBJECT_MIN (new_obj) = 0;
873*38fd1498Szrj OBJECT_MAX (new_obj) = ira_objects_num - 1;
874*38fd1498Szrj }
875*38fd1498Szrj
876*38fd1498Szrj new_move = create_move (set_move->to, new_allocno);
877*38fd1498Szrj set_move->to = new_allocno;
878*38fd1498Szrj move_vec.safe_push (new_move);
879*38fd1498Szrj ira_move_loops_num++;
880*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
881*38fd1498Szrj fprintf (ira_dump_file,
882*38fd1498Szrj " Creating temporary allocno a%dr%d\n",
883*38fd1498Szrj ALLOCNO_NUM (new_allocno),
884*38fd1498Szrj REGNO (allocno_emit_reg (new_allocno)));
885*38fd1498Szrj }
886*38fd1498Szrj }
887*38fd1498Szrj if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
888*38fd1498Szrj continue;
889*38fd1498Szrj nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
890*38fd1498Szrj for (i = 0; i < nregs; i++)
891*38fd1498Szrj {
892*38fd1498Szrj hard_regno_last_set[hard_regno + i] = move;
893*38fd1498Szrj hard_regno_last_set_check[hard_regno + i] = curr_tick;
894*38fd1498Szrj }
895*38fd1498Szrj }
896*38fd1498Szrj for (i = (int) move_vec.length () - 1; i >= 0; i--)
897*38fd1498Szrj {
898*38fd1498Szrj move = move_vec[i];
899*38fd1498Szrj move->next = NULL;
900*38fd1498Szrj last->next = move;
901*38fd1498Szrj last = move;
902*38fd1498Szrj }
903*38fd1498Szrj return first;
904*38fd1498Szrj }
905*38fd1498Szrj
906*38fd1498Szrj /* Generate RTX move insns from the move list LIST. This updates
907*38fd1498Szrj allocation cost using move execution frequency FREQ. */
908*38fd1498Szrj static rtx_insn *
emit_move_list(move_t list,int freq)909*38fd1498Szrj emit_move_list (move_t list, int freq)
910*38fd1498Szrj {
911*38fd1498Szrj rtx to, from, dest;
912*38fd1498Szrj int to_regno, from_regno, cost, regno;
913*38fd1498Szrj rtx_insn *result, *insn;
914*38fd1498Szrj rtx set;
915*38fd1498Szrj machine_mode mode;
916*38fd1498Szrj enum reg_class aclass;
917*38fd1498Szrj
918*38fd1498Szrj grow_reg_equivs ();
919*38fd1498Szrj start_sequence ();
920*38fd1498Szrj for (; list != NULL; list = list->next)
921*38fd1498Szrj {
922*38fd1498Szrj start_sequence ();
923*38fd1498Szrj to = allocno_emit_reg (list->to);
924*38fd1498Szrj to_regno = REGNO (to);
925*38fd1498Szrj from = allocno_emit_reg (list->from);
926*38fd1498Szrj from_regno = REGNO (from);
927*38fd1498Szrj emit_move_insn (to, from);
928*38fd1498Szrj list->insn = get_insns ();
929*38fd1498Szrj end_sequence ();
930*38fd1498Szrj for (insn = list->insn; insn != NULL_RTX; insn = NEXT_INSN (insn))
931*38fd1498Szrj {
932*38fd1498Szrj /* The reload needs to have set up insn codes. If the
933*38fd1498Szrj reload sets up insn codes by itself, it may fail because
934*38fd1498Szrj insns will have hard registers instead of pseudos and
935*38fd1498Szrj there may be no machine insn with given hard
936*38fd1498Szrj registers. */
937*38fd1498Szrj recog_memoized (insn);
938*38fd1498Szrj /* Add insn to equiv init insn list if it is necessary.
939*38fd1498Szrj Otherwise reload will not remove this insn if it decides
940*38fd1498Szrj to use the equivalence. */
941*38fd1498Szrj if ((set = single_set (insn)) != NULL_RTX)
942*38fd1498Szrj {
943*38fd1498Szrj dest = SET_DEST (set);
944*38fd1498Szrj if (GET_CODE (dest) == SUBREG)
945*38fd1498Szrj dest = SUBREG_REG (dest);
946*38fd1498Szrj ira_assert (REG_P (dest));
947*38fd1498Szrj regno = REGNO (dest);
948*38fd1498Szrj if (regno >= ira_reg_equiv_len
949*38fd1498Szrj || (ira_reg_equiv[regno].invariant == NULL_RTX
950*38fd1498Szrj && ira_reg_equiv[regno].constant == NULL_RTX))
951*38fd1498Szrj continue; /* regno has no equivalence. */
952*38fd1498Szrj ira_assert ((int) reg_equivs->length () > regno);
953*38fd1498Szrj reg_equiv_init (regno)
954*38fd1498Szrj = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init (regno));
955*38fd1498Szrj }
956*38fd1498Szrj }
957*38fd1498Szrj if (ira_use_lra_p)
958*38fd1498Szrj ira_update_equiv_info_by_shuffle_insn (to_regno, from_regno, list->insn);
959*38fd1498Szrj emit_insn (list->insn);
960*38fd1498Szrj mode = ALLOCNO_MODE (list->to);
961*38fd1498Szrj aclass = ALLOCNO_CLASS (list->to);
962*38fd1498Szrj cost = 0;
963*38fd1498Szrj if (ALLOCNO_HARD_REGNO (list->to) < 0)
964*38fd1498Szrj {
965*38fd1498Szrj if (ALLOCNO_HARD_REGNO (list->from) >= 0)
966*38fd1498Szrj {
967*38fd1498Szrj cost = ira_memory_move_cost[mode][aclass][0] * freq;
968*38fd1498Szrj ira_store_cost += cost;
969*38fd1498Szrj }
970*38fd1498Szrj }
971*38fd1498Szrj else if (ALLOCNO_HARD_REGNO (list->from) < 0)
972*38fd1498Szrj {
973*38fd1498Szrj if (ALLOCNO_HARD_REGNO (list->to) >= 0)
974*38fd1498Szrj {
975*38fd1498Szrj cost = ira_memory_move_cost[mode][aclass][0] * freq;
976*38fd1498Szrj ira_load_cost += cost;
977*38fd1498Szrj }
978*38fd1498Szrj }
979*38fd1498Szrj else
980*38fd1498Szrj {
981*38fd1498Szrj ira_init_register_move_cost_if_necessary (mode);
982*38fd1498Szrj cost = ira_register_move_cost[mode][aclass][aclass] * freq;
983*38fd1498Szrj ira_shuffle_cost += cost;
984*38fd1498Szrj }
985*38fd1498Szrj ira_overall_cost += cost;
986*38fd1498Szrj }
987*38fd1498Szrj result = get_insns ();
988*38fd1498Szrj end_sequence ();
989*38fd1498Szrj return result;
990*38fd1498Szrj }
991*38fd1498Szrj
992*38fd1498Szrj /* Generate RTX move insns from move lists attached to basic blocks
993*38fd1498Szrj and edges. */
994*38fd1498Szrj static void
emit_moves(void)995*38fd1498Szrj emit_moves (void)
996*38fd1498Szrj {
997*38fd1498Szrj basic_block bb;
998*38fd1498Szrj edge_iterator ei;
999*38fd1498Szrj edge e;
1000*38fd1498Szrj rtx_insn *insns, *tmp;
1001*38fd1498Szrj
1002*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1003*38fd1498Szrj {
1004*38fd1498Szrj if (at_bb_start[bb->index] != NULL)
1005*38fd1498Szrj {
1006*38fd1498Szrj at_bb_start[bb->index] = modify_move_list (at_bb_start[bb->index]);
1007*38fd1498Szrj insns = emit_move_list (at_bb_start[bb->index],
1008*38fd1498Szrj REG_FREQ_FROM_BB (bb));
1009*38fd1498Szrj tmp = BB_HEAD (bb);
1010*38fd1498Szrj if (LABEL_P (tmp))
1011*38fd1498Szrj tmp = NEXT_INSN (tmp);
1012*38fd1498Szrj if (NOTE_INSN_BASIC_BLOCK_P (tmp))
1013*38fd1498Szrj tmp = NEXT_INSN (tmp);
1014*38fd1498Szrj if (tmp == BB_HEAD (bb))
1015*38fd1498Szrj emit_insn_before (insns, tmp);
1016*38fd1498Szrj else if (tmp != NULL_RTX)
1017*38fd1498Szrj emit_insn_after (insns, PREV_INSN (tmp));
1018*38fd1498Szrj else
1019*38fd1498Szrj emit_insn_after (insns, get_last_insn ());
1020*38fd1498Szrj }
1021*38fd1498Szrj
1022*38fd1498Szrj if (at_bb_end[bb->index] != NULL)
1023*38fd1498Szrj {
1024*38fd1498Szrj at_bb_end[bb->index] = modify_move_list (at_bb_end[bb->index]);
1025*38fd1498Szrj insns = emit_move_list (at_bb_end[bb->index], REG_FREQ_FROM_BB (bb));
1026*38fd1498Szrj ira_assert (! control_flow_insn_p (BB_END (bb)));
1027*38fd1498Szrj emit_insn_after (insns, BB_END (bb));
1028*38fd1498Szrj }
1029*38fd1498Szrj
1030*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1031*38fd1498Szrj {
1032*38fd1498Szrj if (e->aux == NULL)
1033*38fd1498Szrj continue;
1034*38fd1498Szrj ira_assert ((e->flags & EDGE_ABNORMAL) == 0
1035*38fd1498Szrj || ! EDGE_CRITICAL_P (e));
1036*38fd1498Szrj e->aux = modify_move_list ((move_t) e->aux);
1037*38fd1498Szrj insert_insn_on_edge
1038*38fd1498Szrj (emit_move_list ((move_t) e->aux,
1039*38fd1498Szrj REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))),
1040*38fd1498Szrj e);
1041*38fd1498Szrj if (e->src->next_bb != e->dest)
1042*38fd1498Szrj ira_additional_jumps_num++;
1043*38fd1498Szrj }
1044*38fd1498Szrj }
1045*38fd1498Szrj }
1046*38fd1498Szrj
1047*38fd1498Szrj /* Update costs of A and corresponding allocnos on upper levels on the
1048*38fd1498Szrj loop tree from reading (if READ_P) or writing A on an execution
1049*38fd1498Szrj path with FREQ. */
1050*38fd1498Szrj static void
update_costs(ira_allocno_t a,bool read_p,int freq)1051*38fd1498Szrj update_costs (ira_allocno_t a, bool read_p, int freq)
1052*38fd1498Szrj {
1053*38fd1498Szrj ira_loop_tree_node_t parent;
1054*38fd1498Szrj
1055*38fd1498Szrj for (;;)
1056*38fd1498Szrj {
1057*38fd1498Szrj ALLOCNO_NREFS (a)++;
1058*38fd1498Szrj ALLOCNO_FREQ (a) += freq;
1059*38fd1498Szrj ALLOCNO_MEMORY_COST (a)
1060*38fd1498Szrj += (ira_memory_move_cost[ALLOCNO_MODE (a)][ALLOCNO_CLASS (a)]
1061*38fd1498Szrj [read_p ? 1 : 0] * freq);
1062*38fd1498Szrj if (ALLOCNO_CAP (a) != NULL)
1063*38fd1498Szrj a = ALLOCNO_CAP (a);
1064*38fd1498Szrj else if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL
1065*38fd1498Szrj || (a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) == NULL)
1066*38fd1498Szrj break;
1067*38fd1498Szrj }
1068*38fd1498Szrj }
1069*38fd1498Szrj
1070*38fd1498Szrj /* Process moves from LIST with execution FREQ to add ranges, copies,
1071*38fd1498Szrj and modify costs for allocnos involved in the moves. All regnos
1072*38fd1498Szrj living through the list is in LIVE_THROUGH, and the loop tree node
1073*38fd1498Szrj used to find corresponding allocnos is NODE. */
1074*38fd1498Szrj static void
add_range_and_copies_from_move_list(move_t list,ira_loop_tree_node_t node,bitmap live_through,int freq)1075*38fd1498Szrj add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node,
1076*38fd1498Szrj bitmap live_through, int freq)
1077*38fd1498Szrj {
1078*38fd1498Szrj int start, n;
1079*38fd1498Szrj unsigned int regno;
1080*38fd1498Szrj move_t move;
1081*38fd1498Szrj ira_allocno_t a;
1082*38fd1498Szrj ira_copy_t cp;
1083*38fd1498Szrj live_range_t r;
1084*38fd1498Szrj bitmap_iterator bi;
1085*38fd1498Szrj HARD_REG_SET hard_regs_live;
1086*38fd1498Szrj
1087*38fd1498Szrj if (list == NULL)
1088*38fd1498Szrj return;
1089*38fd1498Szrj n = 0;
1090*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1091*38fd1498Szrj n++;
1092*38fd1498Szrj REG_SET_TO_HARD_REG_SET (hard_regs_live, live_through);
1093*38fd1498Szrj /* This is a trick to guarantee that new ranges is not merged with
1094*38fd1498Szrj the old ones. */
1095*38fd1498Szrj ira_max_point++;
1096*38fd1498Szrj start = ira_max_point;
1097*38fd1498Szrj for (move = list; move != NULL; move = move->next)
1098*38fd1498Szrj {
1099*38fd1498Szrj ira_allocno_t from = move->from;
1100*38fd1498Szrj ira_allocno_t to = move->to;
1101*38fd1498Szrj int nr, i;
1102*38fd1498Szrj
1103*38fd1498Szrj bitmap_clear_bit (live_through, ALLOCNO_REGNO (from));
1104*38fd1498Szrj bitmap_clear_bit (live_through, ALLOCNO_REGNO (to));
1105*38fd1498Szrj
1106*38fd1498Szrj nr = ALLOCNO_NUM_OBJECTS (to);
1107*38fd1498Szrj for (i = 0; i < nr; i++)
1108*38fd1498Szrj {
1109*38fd1498Szrj ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1110*38fd1498Szrj if (OBJECT_CONFLICT_ARRAY (to_obj) == NULL)
1111*38fd1498Szrj {
1112*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1113*38fd1498Szrj fprintf (ira_dump_file, " Allocate conflicts for a%dr%d\n",
1114*38fd1498Szrj ALLOCNO_NUM (to), REGNO (allocno_emit_reg (to)));
1115*38fd1498Szrj ira_allocate_object_conflicts (to_obj, n);
1116*38fd1498Szrj }
1117*38fd1498Szrj }
1118*38fd1498Szrj ior_hard_reg_conflicts (from, &hard_regs_live);
1119*38fd1498Szrj ior_hard_reg_conflicts (to, &hard_regs_live);
1120*38fd1498Szrj
1121*38fd1498Szrj update_costs (from, true, freq);
1122*38fd1498Szrj update_costs (to, false, freq);
1123*38fd1498Szrj cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
1124*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1125*38fd1498Szrj fprintf (ira_dump_file, " Adding cp%d:a%dr%d-a%dr%d\n",
1126*38fd1498Szrj cp->num, ALLOCNO_NUM (cp->first),
1127*38fd1498Szrj REGNO (allocno_emit_reg (cp->first)),
1128*38fd1498Szrj ALLOCNO_NUM (cp->second),
1129*38fd1498Szrj REGNO (allocno_emit_reg (cp->second)));
1130*38fd1498Szrj
1131*38fd1498Szrj nr = ALLOCNO_NUM_OBJECTS (from);
1132*38fd1498Szrj for (i = 0; i < nr; i++)
1133*38fd1498Szrj {
1134*38fd1498Szrj ira_object_t from_obj = ALLOCNO_OBJECT (from, i);
1135*38fd1498Szrj r = OBJECT_LIVE_RANGES (from_obj);
1136*38fd1498Szrj if (r == NULL || r->finish >= 0)
1137*38fd1498Szrj {
1138*38fd1498Szrj ira_add_live_range_to_object (from_obj, start, ira_max_point);
1139*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1140*38fd1498Szrj fprintf (ira_dump_file,
1141*38fd1498Szrj " Adding range [%d..%d] to allocno a%dr%d\n",
1142*38fd1498Szrj start, ira_max_point, ALLOCNO_NUM (from),
1143*38fd1498Szrj REGNO (allocno_emit_reg (from)));
1144*38fd1498Szrj }
1145*38fd1498Szrj else
1146*38fd1498Szrj {
1147*38fd1498Szrj r->finish = ira_max_point;
1148*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1149*38fd1498Szrj fprintf (ira_dump_file,
1150*38fd1498Szrj " Adding range [%d..%d] to allocno a%dr%d\n",
1151*38fd1498Szrj r->start, ira_max_point, ALLOCNO_NUM (from),
1152*38fd1498Szrj REGNO (allocno_emit_reg (from)));
1153*38fd1498Szrj }
1154*38fd1498Szrj }
1155*38fd1498Szrj ira_max_point++;
1156*38fd1498Szrj nr = ALLOCNO_NUM_OBJECTS (to);
1157*38fd1498Szrj for (i = 0; i < nr; i++)
1158*38fd1498Szrj {
1159*38fd1498Szrj ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
1160*38fd1498Szrj ira_add_live_range_to_object (to_obj, ira_max_point, -1);
1161*38fd1498Szrj }
1162*38fd1498Szrj ira_max_point++;
1163*38fd1498Szrj }
1164*38fd1498Szrj for (move = list; move != NULL; move = move->next)
1165*38fd1498Szrj {
1166*38fd1498Szrj int nr, i;
1167*38fd1498Szrj nr = ALLOCNO_NUM_OBJECTS (move->to);
1168*38fd1498Szrj for (i = 0; i < nr; i++)
1169*38fd1498Szrj {
1170*38fd1498Szrj ira_object_t to_obj = ALLOCNO_OBJECT (move->to, i);
1171*38fd1498Szrj r = OBJECT_LIVE_RANGES (to_obj);
1172*38fd1498Szrj if (r->finish < 0)
1173*38fd1498Szrj {
1174*38fd1498Szrj r->finish = ira_max_point - 1;
1175*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1176*38fd1498Szrj fprintf (ira_dump_file,
1177*38fd1498Szrj " Adding range [%d..%d] to allocno a%dr%d\n",
1178*38fd1498Szrj r->start, r->finish, ALLOCNO_NUM (move->to),
1179*38fd1498Szrj REGNO (allocno_emit_reg (move->to)));
1180*38fd1498Szrj }
1181*38fd1498Szrj }
1182*38fd1498Szrj }
1183*38fd1498Szrj EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
1184*38fd1498Szrj {
1185*38fd1498Szrj ira_allocno_t to;
1186*38fd1498Szrj int nr, i;
1187*38fd1498Szrj
1188*38fd1498Szrj a = node->regno_allocno_map[regno];
1189*38fd1498Szrj if ((to = ALLOCNO_EMIT_DATA (a)->mem_optimized_dest) != NULL)
1190*38fd1498Szrj a = to;
1191*38fd1498Szrj nr = ALLOCNO_NUM_OBJECTS (a);
1192*38fd1498Szrj for (i = 0; i < nr; i++)
1193*38fd1498Szrj {
1194*38fd1498Szrj ira_object_t obj = ALLOCNO_OBJECT (a, i);
1195*38fd1498Szrj ira_add_live_range_to_object (obj, start, ira_max_point - 1);
1196*38fd1498Szrj }
1197*38fd1498Szrj if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
1198*38fd1498Szrj fprintf
1199*38fd1498Szrj (ira_dump_file,
1200*38fd1498Szrj " Adding range [%d..%d] to live through %s allocno a%dr%d\n",
1201*38fd1498Szrj start, ira_max_point - 1,
1202*38fd1498Szrj to != NULL ? "upper level" : "",
1203*38fd1498Szrj ALLOCNO_NUM (a), REGNO (allocno_emit_reg (a)));
1204*38fd1498Szrj }
1205*38fd1498Szrj }
1206*38fd1498Szrj
1207*38fd1498Szrj /* Process all move list to add ranges, conflicts, copies, and modify
1208*38fd1498Szrj costs for allocnos involved in the moves. */
1209*38fd1498Szrj static void
add_ranges_and_copies(void)1210*38fd1498Szrj add_ranges_and_copies (void)
1211*38fd1498Szrj {
1212*38fd1498Szrj basic_block bb;
1213*38fd1498Szrj edge_iterator ei;
1214*38fd1498Szrj edge e;
1215*38fd1498Szrj ira_loop_tree_node_t node;
1216*38fd1498Szrj bitmap live_through;
1217*38fd1498Szrj
1218*38fd1498Szrj live_through = ira_allocate_bitmap ();
1219*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1220*38fd1498Szrj {
1221*38fd1498Szrj /* It does not matter what loop_tree_node (of source or
1222*38fd1498Szrj destination block) to use for searching allocnos by their
1223*38fd1498Szrj regnos because of subsequent IR flattening. */
1224*38fd1498Szrj node = IRA_BB_NODE (bb)->parent;
1225*38fd1498Szrj bitmap_copy (live_through, df_get_live_in (bb));
1226*38fd1498Szrj add_range_and_copies_from_move_list
1227*38fd1498Szrj (at_bb_start[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1228*38fd1498Szrj bitmap_copy (live_through, df_get_live_out (bb));
1229*38fd1498Szrj add_range_and_copies_from_move_list
1230*38fd1498Szrj (at_bb_end[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
1231*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1232*38fd1498Szrj {
1233*38fd1498Szrj bitmap_and (live_through,
1234*38fd1498Szrj df_get_live_in (e->dest), df_get_live_out (bb));
1235*38fd1498Szrj add_range_and_copies_from_move_list
1236*38fd1498Szrj ((move_t) e->aux, node, live_through,
1237*38fd1498Szrj REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e)));
1238*38fd1498Szrj }
1239*38fd1498Szrj }
1240*38fd1498Szrj ira_free_bitmap (live_through);
1241*38fd1498Szrj }
1242*38fd1498Szrj
1243*38fd1498Szrj /* The entry function changes code and generates shuffling allocnos on
1244*38fd1498Szrj region borders for the regional (LOOPS_P is TRUE in this case)
1245*38fd1498Szrj register allocation. */
1246*38fd1498Szrj void
ira_emit(bool loops_p)1247*38fd1498Szrj ira_emit (bool loops_p)
1248*38fd1498Szrj {
1249*38fd1498Szrj basic_block bb;
1250*38fd1498Szrj rtx_insn *insn;
1251*38fd1498Szrj edge_iterator ei;
1252*38fd1498Szrj edge e;
1253*38fd1498Szrj ira_allocno_t a;
1254*38fd1498Szrj ira_allocno_iterator ai;
1255*38fd1498Szrj size_t sz;
1256*38fd1498Szrj
1257*38fd1498Szrj FOR_EACH_ALLOCNO (a, ai)
1258*38fd1498Szrj ALLOCNO_EMIT_DATA (a)->reg = regno_reg_rtx[ALLOCNO_REGNO (a)];
1259*38fd1498Szrj if (! loops_p)
1260*38fd1498Szrj return;
1261*38fd1498Szrj sz = sizeof (move_t) * last_basic_block_for_fn (cfun);
1262*38fd1498Szrj at_bb_start = (move_t *) ira_allocate (sz);
1263*38fd1498Szrj memset (at_bb_start, 0, sz);
1264*38fd1498Szrj at_bb_end = (move_t *) ira_allocate (sz);
1265*38fd1498Szrj memset (at_bb_end, 0, sz);
1266*38fd1498Szrj local_allocno_bitmap = ira_allocate_bitmap ();
1267*38fd1498Szrj used_regno_bitmap = ira_allocate_bitmap ();
1268*38fd1498Szrj renamed_regno_bitmap = ira_allocate_bitmap ();
1269*38fd1498Szrj max_regno_before_changing = max_reg_num ();
1270*38fd1498Szrj ira_traverse_loop_tree (true, ira_loop_tree_root, change_loop, NULL);
1271*38fd1498Szrj set_allocno_somewhere_renamed_p ();
1272*38fd1498Szrj ira_free_bitmap (used_regno_bitmap);
1273*38fd1498Szrj ira_free_bitmap (renamed_regno_bitmap);
1274*38fd1498Szrj ira_free_bitmap (local_allocno_bitmap);
1275*38fd1498Szrj setup_entered_from_non_parent_p ();
1276*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1277*38fd1498Szrj {
1278*38fd1498Szrj at_bb_start[bb->index] = NULL;
1279*38fd1498Szrj at_bb_end[bb->index] = NULL;
1280*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1281*38fd1498Szrj if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1282*38fd1498Szrj generate_edge_moves (e);
1283*38fd1498Szrj }
1284*38fd1498Szrj allocno_last_set
1285*38fd1498Szrj = (move_t *) ira_allocate (sizeof (move_t) * max_reg_num ());
1286*38fd1498Szrj allocno_last_set_check
1287*38fd1498Szrj = (int *) ira_allocate (sizeof (int) * max_reg_num ());
1288*38fd1498Szrj memset (allocno_last_set_check, 0, sizeof (int) * max_reg_num ());
1289*38fd1498Szrj memset (hard_regno_last_set_check, 0, sizeof (hard_regno_last_set_check));
1290*38fd1498Szrj curr_tick = 0;
1291*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1292*38fd1498Szrj unify_moves (bb, true);
1293*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1294*38fd1498Szrj unify_moves (bb, false);
1295*38fd1498Szrj move_vec.create (ira_allocnos_num);
1296*38fd1498Szrj emit_moves ();
1297*38fd1498Szrj add_ranges_and_copies ();
1298*38fd1498Szrj /* Clean up: */
1299*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1300*38fd1498Szrj {
1301*38fd1498Szrj free_move_list (at_bb_start[bb->index]);
1302*38fd1498Szrj free_move_list (at_bb_end[bb->index]);
1303*38fd1498Szrj FOR_EACH_EDGE (e, ei, bb->succs)
1304*38fd1498Szrj {
1305*38fd1498Szrj free_move_list ((move_t) e->aux);
1306*38fd1498Szrj e->aux = NULL;
1307*38fd1498Szrj }
1308*38fd1498Szrj }
1309*38fd1498Szrj move_vec.release ();
1310*38fd1498Szrj ira_free (allocno_last_set_check);
1311*38fd1498Szrj ira_free (allocno_last_set);
1312*38fd1498Szrj commit_edge_insertions ();
1313*38fd1498Szrj /* Fix insn codes. It is necessary to do it before reload because
1314*38fd1498Szrj reload assumes initial insn codes defined. The insn codes can be
1315*38fd1498Szrj invalidated by CFG infrastructure for example in jump
1316*38fd1498Szrj redirection. */
1317*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
1318*38fd1498Szrj FOR_BB_INSNS_REVERSE (bb, insn)
1319*38fd1498Szrj if (INSN_P (insn))
1320*38fd1498Szrj recog_memoized (insn);
1321*38fd1498Szrj ira_free (at_bb_end);
1322*38fd1498Szrj ira_free (at_bb_start);
1323*38fd1498Szrj }
1324