1*38fd1498Szrj /* Perform simple optimizations to clean up the result of reload.
2*38fd1498Szrj Copyright (C) 1987-2018 Free Software Foundation, Inc.
3*38fd1498Szrj
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3. If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>. */
19*38fd1498Szrj
20*38fd1498Szrj #include "config.h"
21*38fd1498Szrj #include "system.h"
22*38fd1498Szrj #include "coretypes.h"
23*38fd1498Szrj #include "backend.h"
24*38fd1498Szrj #include "target.h"
25*38fd1498Szrj #include "rtl.h"
26*38fd1498Szrj #include "tree.h"
27*38fd1498Szrj #include "predict.h"
28*38fd1498Szrj #include "df.h"
29*38fd1498Szrj #include "memmodel.h"
30*38fd1498Szrj #include "tm_p.h"
31*38fd1498Szrj #include "optabs.h"
32*38fd1498Szrj #include "regs.h"
33*38fd1498Szrj #include "emit-rtl.h"
34*38fd1498Szrj #include "recog.h"
35*38fd1498Szrj
36*38fd1498Szrj #include "cfgrtl.h"
37*38fd1498Szrj #include "cfgbuild.h"
38*38fd1498Szrj #include "cfgcleanup.h"
39*38fd1498Szrj #include "reload.h"
40*38fd1498Szrj #include "cselib.h"
41*38fd1498Szrj #include "tree-pass.h"
42*38fd1498Szrj #include "dbgcnt.h"
43*38fd1498Szrj
44*38fd1498Szrj static int reload_cse_noop_set_p (rtx);
45*38fd1498Szrj static bool reload_cse_simplify (rtx_insn *, rtx);
46*38fd1498Szrj static void reload_cse_regs_1 (void);
47*38fd1498Szrj static int reload_cse_simplify_set (rtx, rtx_insn *);
48*38fd1498Szrj static int reload_cse_simplify_operands (rtx_insn *, rtx);
49*38fd1498Szrj
50*38fd1498Szrj static void reload_combine (void);
51*38fd1498Szrj static void reload_combine_note_use (rtx *, rtx_insn *, int, rtx);
52*38fd1498Szrj static void reload_combine_note_store (rtx, const_rtx, void *);
53*38fd1498Szrj
54*38fd1498Szrj static bool reload_cse_move2add (rtx_insn *);
55*38fd1498Szrj static void move2add_note_store (rtx, const_rtx, void *);
56*38fd1498Szrj
57*38fd1498Szrj /* Call cse / combine like post-reload optimization phases.
58*38fd1498Szrj FIRST is the first instruction. */
59*38fd1498Szrj
60*38fd1498Szrj static void
reload_cse_regs(rtx_insn * first ATTRIBUTE_UNUSED)61*38fd1498Szrj reload_cse_regs (rtx_insn *first ATTRIBUTE_UNUSED)
62*38fd1498Szrj {
63*38fd1498Szrj bool moves_converted;
64*38fd1498Szrj reload_cse_regs_1 ();
65*38fd1498Szrj reload_combine ();
66*38fd1498Szrj moves_converted = reload_cse_move2add (first);
67*38fd1498Szrj if (flag_expensive_optimizations)
68*38fd1498Szrj {
69*38fd1498Szrj if (moves_converted)
70*38fd1498Szrj reload_combine ();
71*38fd1498Szrj reload_cse_regs_1 ();
72*38fd1498Szrj }
73*38fd1498Szrj }
74*38fd1498Szrj
75*38fd1498Szrj /* See whether a single set SET is a noop. */
76*38fd1498Szrj static int
reload_cse_noop_set_p(rtx set)77*38fd1498Szrj reload_cse_noop_set_p (rtx set)
78*38fd1498Szrj {
79*38fd1498Szrj if (cselib_reg_set_mode (SET_DEST (set)) != GET_MODE (SET_DEST (set)))
80*38fd1498Szrj return 0;
81*38fd1498Szrj
82*38fd1498Szrj return rtx_equal_for_cselib_p (SET_DEST (set), SET_SRC (set));
83*38fd1498Szrj }
84*38fd1498Szrj
85*38fd1498Szrj /* Try to simplify INSN. Return true if the CFG may have changed. */
86*38fd1498Szrj static bool
reload_cse_simplify(rtx_insn * insn,rtx testreg)87*38fd1498Szrj reload_cse_simplify (rtx_insn *insn, rtx testreg)
88*38fd1498Szrj {
89*38fd1498Szrj rtx body = PATTERN (insn);
90*38fd1498Szrj basic_block insn_bb = BLOCK_FOR_INSN (insn);
91*38fd1498Szrj unsigned insn_bb_succs = EDGE_COUNT (insn_bb->succs);
92*38fd1498Szrj
93*38fd1498Szrj /* If NO_FUNCTION_CSE has been set by the target, then we should not try
94*38fd1498Szrj to cse function calls. */
95*38fd1498Szrj if (NO_FUNCTION_CSE && CALL_P (insn))
96*38fd1498Szrj return false;
97*38fd1498Szrj
98*38fd1498Szrj if (GET_CODE (body) == SET)
99*38fd1498Szrj {
100*38fd1498Szrj int count = 0;
101*38fd1498Szrj
102*38fd1498Szrj /* Simplify even if we may think it is a no-op.
103*38fd1498Szrj We may think a memory load of a value smaller than WORD_SIZE
104*38fd1498Szrj is redundant because we haven't taken into account possible
105*38fd1498Szrj implicit extension. reload_cse_simplify_set() will bring
106*38fd1498Szrj this out, so it's safer to simplify before we delete. */
107*38fd1498Szrj count += reload_cse_simplify_set (body, insn);
108*38fd1498Szrj
109*38fd1498Szrj if (!count && reload_cse_noop_set_p (body))
110*38fd1498Szrj {
111*38fd1498Szrj if (check_for_inc_dec (insn))
112*38fd1498Szrj delete_insn_and_edges (insn);
113*38fd1498Szrj /* We're done with this insn. */
114*38fd1498Szrj goto done;
115*38fd1498Szrj }
116*38fd1498Szrj
117*38fd1498Szrj if (count > 0)
118*38fd1498Szrj apply_change_group ();
119*38fd1498Szrj else
120*38fd1498Szrj reload_cse_simplify_operands (insn, testreg);
121*38fd1498Szrj }
122*38fd1498Szrj else if (GET_CODE (body) == PARALLEL)
123*38fd1498Szrj {
124*38fd1498Szrj int i;
125*38fd1498Szrj int count = 0;
126*38fd1498Szrj rtx value = NULL_RTX;
127*38fd1498Szrj
128*38fd1498Szrj /* Registers mentioned in the clobber list for an asm cannot be reused
129*38fd1498Szrj within the body of the asm. Invalidate those registers now so that
130*38fd1498Szrj we don't try to substitute values for them. */
131*38fd1498Szrj if (asm_noperands (body) >= 0)
132*38fd1498Szrj {
133*38fd1498Szrj for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
134*38fd1498Szrj {
135*38fd1498Szrj rtx part = XVECEXP (body, 0, i);
136*38fd1498Szrj if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0)))
137*38fd1498Szrj cselib_invalidate_rtx (XEXP (part, 0));
138*38fd1498Szrj }
139*38fd1498Szrj }
140*38fd1498Szrj
141*38fd1498Szrj /* If every action in a PARALLEL is a noop, we can delete
142*38fd1498Szrj the entire PARALLEL. */
143*38fd1498Szrj for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
144*38fd1498Szrj {
145*38fd1498Szrj rtx part = XVECEXP (body, 0, i);
146*38fd1498Szrj if (GET_CODE (part) == SET)
147*38fd1498Szrj {
148*38fd1498Szrj if (! reload_cse_noop_set_p (part))
149*38fd1498Szrj break;
150*38fd1498Szrj if (REG_P (SET_DEST (part))
151*38fd1498Szrj && REG_FUNCTION_VALUE_P (SET_DEST (part)))
152*38fd1498Szrj {
153*38fd1498Szrj if (value)
154*38fd1498Szrj break;
155*38fd1498Szrj value = SET_DEST (part);
156*38fd1498Szrj }
157*38fd1498Szrj }
158*38fd1498Szrj else if (GET_CODE (part) != CLOBBER
159*38fd1498Szrj && GET_CODE (part) != USE)
160*38fd1498Szrj break;
161*38fd1498Szrj }
162*38fd1498Szrj
163*38fd1498Szrj if (i < 0)
164*38fd1498Szrj {
165*38fd1498Szrj if (check_for_inc_dec (insn))
166*38fd1498Szrj delete_insn_and_edges (insn);
167*38fd1498Szrj /* We're done with this insn. */
168*38fd1498Szrj goto done;
169*38fd1498Szrj }
170*38fd1498Szrj
171*38fd1498Szrj /* It's not a no-op, but we can try to simplify it. */
172*38fd1498Szrj for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
173*38fd1498Szrj if (GET_CODE (XVECEXP (body, 0, i)) == SET)
174*38fd1498Szrj count += reload_cse_simplify_set (XVECEXP (body, 0, i), insn);
175*38fd1498Szrj
176*38fd1498Szrj if (count > 0)
177*38fd1498Szrj apply_change_group ();
178*38fd1498Szrj else
179*38fd1498Szrj reload_cse_simplify_operands (insn, testreg);
180*38fd1498Szrj }
181*38fd1498Szrj
182*38fd1498Szrj done:
183*38fd1498Szrj return (EDGE_COUNT (insn_bb->succs) != insn_bb_succs);
184*38fd1498Szrj }
185*38fd1498Szrj
186*38fd1498Szrj /* Do a very simple CSE pass over the hard registers.
187*38fd1498Szrj
188*38fd1498Szrj This function detects no-op moves where we happened to assign two
189*38fd1498Szrj different pseudo-registers to the same hard register, and then
190*38fd1498Szrj copied one to the other. Reload will generate a useless
191*38fd1498Szrj instruction copying a register to itself.
192*38fd1498Szrj
193*38fd1498Szrj This function also detects cases where we load a value from memory
194*38fd1498Szrj into two different registers, and (if memory is more expensive than
195*38fd1498Szrj registers) changes it to simply copy the first register into the
196*38fd1498Szrj second register.
197*38fd1498Szrj
198*38fd1498Szrj Another optimization is performed that scans the operands of each
199*38fd1498Szrj instruction to see whether the value is already available in a
200*38fd1498Szrj hard register. It then replaces the operand with the hard register
201*38fd1498Szrj if possible, much like an optional reload would. */
202*38fd1498Szrj
203*38fd1498Szrj static void
reload_cse_regs_1(void)204*38fd1498Szrj reload_cse_regs_1 (void)
205*38fd1498Szrj {
206*38fd1498Szrj bool cfg_changed = false;
207*38fd1498Szrj basic_block bb;
208*38fd1498Szrj rtx_insn *insn;
209*38fd1498Szrj rtx testreg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
210*38fd1498Szrj
211*38fd1498Szrj cselib_init (CSELIB_RECORD_MEMORY);
212*38fd1498Szrj init_alias_analysis ();
213*38fd1498Szrj
214*38fd1498Szrj FOR_EACH_BB_FN (bb, cfun)
215*38fd1498Szrj FOR_BB_INSNS (bb, insn)
216*38fd1498Szrj {
217*38fd1498Szrj if (INSN_P (insn))
218*38fd1498Szrj cfg_changed |= reload_cse_simplify (insn, testreg);
219*38fd1498Szrj
220*38fd1498Szrj cselib_process_insn (insn);
221*38fd1498Szrj }
222*38fd1498Szrj
223*38fd1498Szrj /* Clean up. */
224*38fd1498Szrj end_alias_analysis ();
225*38fd1498Szrj cselib_finish ();
226*38fd1498Szrj if (cfg_changed)
227*38fd1498Szrj cleanup_cfg (0);
228*38fd1498Szrj }
229*38fd1498Szrj
230*38fd1498Szrj /* Try to simplify a single SET instruction. SET is the set pattern.
231*38fd1498Szrj INSN is the instruction it came from.
232*38fd1498Szrj This function only handles one case: if we set a register to a value
233*38fd1498Szrj which is not a register, we try to find that value in some other register
234*38fd1498Szrj and change the set into a register copy. */
235*38fd1498Szrj
236*38fd1498Szrj static int
reload_cse_simplify_set(rtx set,rtx_insn * insn)237*38fd1498Szrj reload_cse_simplify_set (rtx set, rtx_insn *insn)
238*38fd1498Szrj {
239*38fd1498Szrj int did_change = 0;
240*38fd1498Szrj int dreg;
241*38fd1498Szrj rtx src;
242*38fd1498Szrj reg_class_t dclass;
243*38fd1498Szrj int old_cost;
244*38fd1498Szrj cselib_val *val;
245*38fd1498Szrj struct elt_loc_list *l;
246*38fd1498Szrj enum rtx_code extend_op = UNKNOWN;
247*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
248*38fd1498Szrj
249*38fd1498Szrj dreg = true_regnum (SET_DEST (set));
250*38fd1498Szrj if (dreg < 0)
251*38fd1498Szrj return 0;
252*38fd1498Szrj
253*38fd1498Szrj src = SET_SRC (set);
254*38fd1498Szrj if (side_effects_p (src) || true_regnum (src) >= 0)
255*38fd1498Szrj return 0;
256*38fd1498Szrj
257*38fd1498Szrj dclass = REGNO_REG_CLASS (dreg);
258*38fd1498Szrj
259*38fd1498Szrj /* When replacing a memory with a register, we need to honor assumptions
260*38fd1498Szrj that combine made wrt the contents of sign bits. We'll do this by
261*38fd1498Szrj generating an extend instruction instead of a reg->reg copy. Thus
262*38fd1498Szrj the destination must be a register that we can widen. */
263*38fd1498Szrj if (MEM_P (src)
264*38fd1498Szrj && (extend_op = load_extend_op (GET_MODE (src))) != UNKNOWN
265*38fd1498Szrj && !REG_P (SET_DEST (set)))
266*38fd1498Szrj return 0;
267*38fd1498Szrj
268*38fd1498Szrj val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0, VOIDmode);
269*38fd1498Szrj if (! val)
270*38fd1498Szrj return 0;
271*38fd1498Szrj
272*38fd1498Szrj /* If memory loads are cheaper than register copies, don't change them. */
273*38fd1498Szrj if (MEM_P (src))
274*38fd1498Szrj old_cost = memory_move_cost (GET_MODE (src), dclass, true);
275*38fd1498Szrj else if (REG_P (src))
276*38fd1498Szrj old_cost = register_move_cost (GET_MODE (src),
277*38fd1498Szrj REGNO_REG_CLASS (REGNO (src)), dclass);
278*38fd1498Szrj else
279*38fd1498Szrj old_cost = set_src_cost (src, GET_MODE (SET_DEST (set)), speed);
280*38fd1498Szrj
281*38fd1498Szrj for (l = val->locs; l; l = l->next)
282*38fd1498Szrj {
283*38fd1498Szrj rtx this_rtx = l->loc;
284*38fd1498Szrj int this_cost;
285*38fd1498Szrj
286*38fd1498Szrj if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0))
287*38fd1498Szrj {
288*38fd1498Szrj if (extend_op != UNKNOWN)
289*38fd1498Szrj {
290*38fd1498Szrj wide_int result;
291*38fd1498Szrj
292*38fd1498Szrj if (!CONST_SCALAR_INT_P (this_rtx))
293*38fd1498Szrj continue;
294*38fd1498Szrj
295*38fd1498Szrj switch (extend_op)
296*38fd1498Szrj {
297*38fd1498Szrj case ZERO_EXTEND:
298*38fd1498Szrj result = wide_int::from (rtx_mode_t (this_rtx,
299*38fd1498Szrj GET_MODE (src)),
300*38fd1498Szrj BITS_PER_WORD, UNSIGNED);
301*38fd1498Szrj break;
302*38fd1498Szrj case SIGN_EXTEND:
303*38fd1498Szrj result = wide_int::from (rtx_mode_t (this_rtx,
304*38fd1498Szrj GET_MODE (src)),
305*38fd1498Szrj BITS_PER_WORD, SIGNED);
306*38fd1498Szrj break;
307*38fd1498Szrj default:
308*38fd1498Szrj gcc_unreachable ();
309*38fd1498Szrj }
310*38fd1498Szrj this_rtx = immed_wide_int_const (result, word_mode);
311*38fd1498Szrj }
312*38fd1498Szrj
313*38fd1498Szrj this_cost = set_src_cost (this_rtx, GET_MODE (SET_DEST (set)), speed);
314*38fd1498Szrj }
315*38fd1498Szrj else if (REG_P (this_rtx))
316*38fd1498Szrj {
317*38fd1498Szrj if (extend_op != UNKNOWN)
318*38fd1498Szrj {
319*38fd1498Szrj this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
320*38fd1498Szrj this_cost = set_src_cost (this_rtx, word_mode, speed);
321*38fd1498Szrj }
322*38fd1498Szrj else
323*38fd1498Szrj this_cost = register_move_cost (GET_MODE (this_rtx),
324*38fd1498Szrj REGNO_REG_CLASS (REGNO (this_rtx)),
325*38fd1498Szrj dclass);
326*38fd1498Szrj }
327*38fd1498Szrj else
328*38fd1498Szrj continue;
329*38fd1498Szrj
330*38fd1498Szrj /* If equal costs, prefer registers over anything else. That
331*38fd1498Szrj tends to lead to smaller instructions on some machines. */
332*38fd1498Szrj if (this_cost < old_cost
333*38fd1498Szrj || (this_cost == old_cost
334*38fd1498Szrj && REG_P (this_rtx)
335*38fd1498Szrj && !REG_P (SET_SRC (set))))
336*38fd1498Szrj {
337*38fd1498Szrj if (extend_op != UNKNOWN
338*38fd1498Szrj && REG_CAN_CHANGE_MODE_P (REGNO (SET_DEST (set)),
339*38fd1498Szrj GET_MODE (SET_DEST (set)), word_mode))
340*38fd1498Szrj {
341*38fd1498Szrj rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set)));
342*38fd1498Szrj ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set));
343*38fd1498Szrj validate_change (insn, &SET_DEST (set), wide_dest, 1);
344*38fd1498Szrj }
345*38fd1498Szrj
346*38fd1498Szrj validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1);
347*38fd1498Szrj old_cost = this_cost, did_change = 1;
348*38fd1498Szrj }
349*38fd1498Szrj }
350*38fd1498Szrj
351*38fd1498Szrj return did_change;
352*38fd1498Szrj }
353*38fd1498Szrj
354*38fd1498Szrj /* Try to replace operands in INSN with equivalent values that are already
355*38fd1498Szrj in registers. This can be viewed as optional reloading.
356*38fd1498Szrj
357*38fd1498Szrj For each non-register operand in the insn, see if any hard regs are
358*38fd1498Szrj known to be equivalent to that operand. Record the alternatives which
359*38fd1498Szrj can accept these hard registers. Among all alternatives, select the
360*38fd1498Szrj ones which are better or equal to the one currently matching, where
361*38fd1498Szrj "better" is in terms of '?' and '!' constraints. Among the remaining
362*38fd1498Szrj alternatives, select the one which replaces most operands with
363*38fd1498Szrj hard registers. */
364*38fd1498Szrj
365*38fd1498Szrj static int
reload_cse_simplify_operands(rtx_insn * insn,rtx testreg)366*38fd1498Szrj reload_cse_simplify_operands (rtx_insn *insn, rtx testreg)
367*38fd1498Szrj {
368*38fd1498Szrj int i, j;
369*38fd1498Szrj
370*38fd1498Szrj /* For each operand, all registers that are equivalent to it. */
371*38fd1498Szrj HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS];
372*38fd1498Szrj
373*38fd1498Szrj const char *constraints[MAX_RECOG_OPERANDS];
374*38fd1498Szrj
375*38fd1498Szrj /* Vector recording how bad an alternative is. */
376*38fd1498Szrj int *alternative_reject;
377*38fd1498Szrj /* Vector recording how many registers can be introduced by choosing
378*38fd1498Szrj this alternative. */
379*38fd1498Szrj int *alternative_nregs;
380*38fd1498Szrj /* Array of vectors recording, for each operand and each alternative,
381*38fd1498Szrj which hard register to substitute, or -1 if the operand should be
382*38fd1498Szrj left as it is. */
383*38fd1498Szrj int *op_alt_regno[MAX_RECOG_OPERANDS];
384*38fd1498Szrj /* Array of alternatives, sorted in order of decreasing desirability. */
385*38fd1498Szrj int *alternative_order;
386*38fd1498Szrj
387*38fd1498Szrj extract_constrain_insn (insn);
388*38fd1498Szrj
389*38fd1498Szrj if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0)
390*38fd1498Szrj return 0;
391*38fd1498Szrj
392*38fd1498Szrj alternative_reject = XALLOCAVEC (int, recog_data.n_alternatives);
393*38fd1498Szrj alternative_nregs = XALLOCAVEC (int, recog_data.n_alternatives);
394*38fd1498Szrj alternative_order = XALLOCAVEC (int, recog_data.n_alternatives);
395*38fd1498Szrj memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int));
396*38fd1498Szrj memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int));
397*38fd1498Szrj
398*38fd1498Szrj /* For each operand, find out which regs are equivalent. */
399*38fd1498Szrj for (i = 0; i < recog_data.n_operands; i++)
400*38fd1498Szrj {
401*38fd1498Szrj cselib_val *v;
402*38fd1498Szrj struct elt_loc_list *l;
403*38fd1498Szrj rtx op;
404*38fd1498Szrj
405*38fd1498Szrj CLEAR_HARD_REG_SET (equiv_regs[i]);
406*38fd1498Szrj
407*38fd1498Szrj /* cselib blows up on CODE_LABELs. Trying to fix that doesn't seem
408*38fd1498Szrj right, so avoid the problem here. Similarly NOTE_INSN_DELETED_LABEL.
409*38fd1498Szrj Likewise if we have a constant and the insn pattern doesn't tell us
410*38fd1498Szrj the mode we need. */
411*38fd1498Szrj if (LABEL_P (recog_data.operand[i])
412*38fd1498Szrj || (NOTE_P (recog_data.operand[i])
413*38fd1498Szrj && NOTE_KIND (recog_data.operand[i]) == NOTE_INSN_DELETED_LABEL)
414*38fd1498Szrj || (CONSTANT_P (recog_data.operand[i])
415*38fd1498Szrj && recog_data.operand_mode[i] == VOIDmode))
416*38fd1498Szrj continue;
417*38fd1498Szrj
418*38fd1498Szrj op = recog_data.operand[i];
419*38fd1498Szrj if (MEM_P (op) && load_extend_op (GET_MODE (op)) != UNKNOWN)
420*38fd1498Szrj {
421*38fd1498Szrj rtx set = single_set (insn);
422*38fd1498Szrj
423*38fd1498Szrj /* We might have multiple sets, some of which do implicit
424*38fd1498Szrj extension. Punt on this for now. */
425*38fd1498Szrj if (! set)
426*38fd1498Szrj continue;
427*38fd1498Szrj /* If the destination is also a MEM or a STRICT_LOW_PART, no
428*38fd1498Szrj extension applies.
429*38fd1498Szrj Also, if there is an explicit extension, we don't have to
430*38fd1498Szrj worry about an implicit one. */
431*38fd1498Szrj else if (MEM_P (SET_DEST (set))
432*38fd1498Szrj || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART
433*38fd1498Szrj || GET_CODE (SET_SRC (set)) == ZERO_EXTEND
434*38fd1498Szrj || GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
435*38fd1498Szrj ; /* Continue ordinary processing. */
436*38fd1498Szrj /* If the register cannot change mode to word_mode, it follows that
437*38fd1498Szrj it cannot have been used in word_mode. */
438*38fd1498Szrj else if (REG_P (SET_DEST (set))
439*38fd1498Szrj && !REG_CAN_CHANGE_MODE_P (REGNO (SET_DEST (set)),
440*38fd1498Szrj GET_MODE (SET_DEST (set)),
441*38fd1498Szrj word_mode))
442*38fd1498Szrj ; /* Continue ordinary processing. */
443*38fd1498Szrj /* If this is a straight load, make the extension explicit. */
444*38fd1498Szrj else if (REG_P (SET_DEST (set))
445*38fd1498Szrj && recog_data.n_operands == 2
446*38fd1498Szrj && SET_SRC (set) == op
447*38fd1498Szrj && SET_DEST (set) == recog_data.operand[1-i])
448*38fd1498Szrj {
449*38fd1498Szrj validate_change (insn, recog_data.operand_loc[i],
450*38fd1498Szrj gen_rtx_fmt_e (load_extend_op (GET_MODE (op)),
451*38fd1498Szrj word_mode, op),
452*38fd1498Szrj 1);
453*38fd1498Szrj validate_change (insn, recog_data.operand_loc[1-i],
454*38fd1498Szrj gen_rtx_REG (word_mode, REGNO (SET_DEST (set))),
455*38fd1498Szrj 1);
456*38fd1498Szrj if (! apply_change_group ())
457*38fd1498Szrj return 0;
458*38fd1498Szrj return reload_cse_simplify_operands (insn, testreg);
459*38fd1498Szrj }
460*38fd1498Szrj else
461*38fd1498Szrj /* ??? There might be arithmetic operations with memory that are
462*38fd1498Szrj safe to optimize, but is it worth the trouble? */
463*38fd1498Szrj continue;
464*38fd1498Szrj }
465*38fd1498Szrj
466*38fd1498Szrj if (side_effects_p (op))
467*38fd1498Szrj continue;
468*38fd1498Szrj v = cselib_lookup (op, recog_data.operand_mode[i], 0, VOIDmode);
469*38fd1498Szrj if (! v)
470*38fd1498Szrj continue;
471*38fd1498Szrj
472*38fd1498Szrj for (l = v->locs; l; l = l->next)
473*38fd1498Szrj if (REG_P (l->loc))
474*38fd1498Szrj SET_HARD_REG_BIT (equiv_regs[i], REGNO (l->loc));
475*38fd1498Szrj }
476*38fd1498Szrj
477*38fd1498Szrj alternative_mask preferred = get_preferred_alternatives (insn);
478*38fd1498Szrj for (i = 0; i < recog_data.n_operands; i++)
479*38fd1498Szrj {
480*38fd1498Szrj machine_mode mode;
481*38fd1498Szrj int regno;
482*38fd1498Szrj const char *p;
483*38fd1498Szrj
484*38fd1498Szrj op_alt_regno[i] = XALLOCAVEC (int, recog_data.n_alternatives);
485*38fd1498Szrj for (j = 0; j < recog_data.n_alternatives; j++)
486*38fd1498Szrj op_alt_regno[i][j] = -1;
487*38fd1498Szrj
488*38fd1498Szrj p = constraints[i] = recog_data.constraints[i];
489*38fd1498Szrj mode = recog_data.operand_mode[i];
490*38fd1498Szrj
491*38fd1498Szrj /* Add the reject values for each alternative given by the constraints
492*38fd1498Szrj for this operand. */
493*38fd1498Szrj j = 0;
494*38fd1498Szrj while (*p != '\0')
495*38fd1498Szrj {
496*38fd1498Szrj char c = *p++;
497*38fd1498Szrj if (c == ',')
498*38fd1498Szrj j++;
499*38fd1498Szrj else if (c == '?')
500*38fd1498Szrj alternative_reject[j] += 3;
501*38fd1498Szrj else if (c == '!')
502*38fd1498Szrj alternative_reject[j] += 300;
503*38fd1498Szrj }
504*38fd1498Szrj
505*38fd1498Szrj /* We won't change operands which are already registers. We
506*38fd1498Szrj also don't want to modify output operands. */
507*38fd1498Szrj regno = true_regnum (recog_data.operand[i]);
508*38fd1498Szrj if (regno >= 0
509*38fd1498Szrj || constraints[i][0] == '='
510*38fd1498Szrj || constraints[i][0] == '+')
511*38fd1498Szrj continue;
512*38fd1498Szrj
513*38fd1498Szrj for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
514*38fd1498Szrj {
515*38fd1498Szrj enum reg_class rclass = NO_REGS;
516*38fd1498Szrj
517*38fd1498Szrj if (! TEST_HARD_REG_BIT (equiv_regs[i], regno))
518*38fd1498Szrj continue;
519*38fd1498Szrj
520*38fd1498Szrj set_mode_and_regno (testreg, mode, regno);
521*38fd1498Szrj
522*38fd1498Szrj /* We found a register equal to this operand. Now look for all
523*38fd1498Szrj alternatives that can accept this register and have not been
524*38fd1498Szrj assigned a register they can use yet. */
525*38fd1498Szrj j = 0;
526*38fd1498Szrj p = constraints[i];
527*38fd1498Szrj for (;;)
528*38fd1498Szrj {
529*38fd1498Szrj char c = *p;
530*38fd1498Szrj
531*38fd1498Szrj switch (c)
532*38fd1498Szrj {
533*38fd1498Szrj case 'g':
534*38fd1498Szrj rclass = reg_class_subunion[rclass][GENERAL_REGS];
535*38fd1498Szrj break;
536*38fd1498Szrj
537*38fd1498Szrj default:
538*38fd1498Szrj rclass
539*38fd1498Szrj = (reg_class_subunion
540*38fd1498Szrj [rclass]
541*38fd1498Szrj [reg_class_for_constraint (lookup_constraint (p))]);
542*38fd1498Szrj break;
543*38fd1498Szrj
544*38fd1498Szrj case ',': case '\0':
545*38fd1498Szrj /* See if REGNO fits this alternative, and set it up as the
546*38fd1498Szrj replacement register if we don't have one for this
547*38fd1498Szrj alternative yet and the operand being replaced is not
548*38fd1498Szrj a cheap CONST_INT. */
549*38fd1498Szrj if (op_alt_regno[i][j] == -1
550*38fd1498Szrj && TEST_BIT (preferred, j)
551*38fd1498Szrj && reg_fits_class_p (testreg, rclass, 0, mode)
552*38fd1498Szrj && (!CONST_INT_P (recog_data.operand[i])
553*38fd1498Szrj || (set_src_cost (recog_data.operand[i], mode,
554*38fd1498Szrj optimize_bb_for_speed_p
555*38fd1498Szrj (BLOCK_FOR_INSN (insn)))
556*38fd1498Szrj > set_src_cost (testreg, mode,
557*38fd1498Szrj optimize_bb_for_speed_p
558*38fd1498Szrj (BLOCK_FOR_INSN (insn))))))
559*38fd1498Szrj {
560*38fd1498Szrj alternative_nregs[j]++;
561*38fd1498Szrj op_alt_regno[i][j] = regno;
562*38fd1498Szrj }
563*38fd1498Szrj j++;
564*38fd1498Szrj rclass = NO_REGS;
565*38fd1498Szrj break;
566*38fd1498Szrj }
567*38fd1498Szrj p += CONSTRAINT_LEN (c, p);
568*38fd1498Szrj
569*38fd1498Szrj if (c == '\0')
570*38fd1498Szrj break;
571*38fd1498Szrj }
572*38fd1498Szrj }
573*38fd1498Szrj }
574*38fd1498Szrj
575*38fd1498Szrj /* Record all alternatives which are better or equal to the currently
576*38fd1498Szrj matching one in the alternative_order array. */
577*38fd1498Szrj for (i = j = 0; i < recog_data.n_alternatives; i++)
578*38fd1498Szrj if (alternative_reject[i] <= alternative_reject[which_alternative])
579*38fd1498Szrj alternative_order[j++] = i;
580*38fd1498Szrj recog_data.n_alternatives = j;
581*38fd1498Szrj
582*38fd1498Szrj /* Sort it. Given a small number of alternatives, a dumb algorithm
583*38fd1498Szrj won't hurt too much. */
584*38fd1498Szrj for (i = 0; i < recog_data.n_alternatives - 1; i++)
585*38fd1498Szrj {
586*38fd1498Szrj int best = i;
587*38fd1498Szrj int best_reject = alternative_reject[alternative_order[i]];
588*38fd1498Szrj int best_nregs = alternative_nregs[alternative_order[i]];
589*38fd1498Szrj
590*38fd1498Szrj for (j = i + 1; j < recog_data.n_alternatives; j++)
591*38fd1498Szrj {
592*38fd1498Szrj int this_reject = alternative_reject[alternative_order[j]];
593*38fd1498Szrj int this_nregs = alternative_nregs[alternative_order[j]];
594*38fd1498Szrj
595*38fd1498Szrj if (this_reject < best_reject
596*38fd1498Szrj || (this_reject == best_reject && this_nregs > best_nregs))
597*38fd1498Szrj {
598*38fd1498Szrj best = j;
599*38fd1498Szrj best_reject = this_reject;
600*38fd1498Szrj best_nregs = this_nregs;
601*38fd1498Szrj }
602*38fd1498Szrj }
603*38fd1498Szrj
604*38fd1498Szrj std::swap (alternative_order[best], alternative_order[i]);
605*38fd1498Szrj }
606*38fd1498Szrj
607*38fd1498Szrj /* Substitute the operands as determined by op_alt_regno for the best
608*38fd1498Szrj alternative. */
609*38fd1498Szrj j = alternative_order[0];
610*38fd1498Szrj
611*38fd1498Szrj for (i = 0; i < recog_data.n_operands; i++)
612*38fd1498Szrj {
613*38fd1498Szrj machine_mode mode = recog_data.operand_mode[i];
614*38fd1498Szrj if (op_alt_regno[i][j] == -1)
615*38fd1498Szrj continue;
616*38fd1498Szrj
617*38fd1498Szrj validate_change (insn, recog_data.operand_loc[i],
618*38fd1498Szrj gen_rtx_REG (mode, op_alt_regno[i][j]), 1);
619*38fd1498Szrj }
620*38fd1498Szrj
621*38fd1498Szrj for (i = recog_data.n_dups - 1; i >= 0; i--)
622*38fd1498Szrj {
623*38fd1498Szrj int op = recog_data.dup_num[i];
624*38fd1498Szrj machine_mode mode = recog_data.operand_mode[op];
625*38fd1498Szrj
626*38fd1498Szrj if (op_alt_regno[op][j] == -1)
627*38fd1498Szrj continue;
628*38fd1498Szrj
629*38fd1498Szrj validate_change (insn, recog_data.dup_loc[i],
630*38fd1498Szrj gen_rtx_REG (mode, op_alt_regno[op][j]), 1);
631*38fd1498Szrj }
632*38fd1498Szrj
633*38fd1498Szrj return apply_change_group ();
634*38fd1498Szrj }
635*38fd1498Szrj
636*38fd1498Szrj /* If reload couldn't use reg+reg+offset addressing, try to use reg+reg
637*38fd1498Szrj addressing now.
638*38fd1498Szrj This code might also be useful when reload gave up on reg+reg addressing
639*38fd1498Szrj because of clashes between the return register and INDEX_REG_CLASS. */
640*38fd1498Szrj
641*38fd1498Szrj /* The maximum number of uses of a register we can keep track of to
642*38fd1498Szrj replace them with reg+reg addressing. */
643*38fd1498Szrj #define RELOAD_COMBINE_MAX_USES 16
644*38fd1498Szrj
645*38fd1498Szrj /* Describes a recorded use of a register. */
646*38fd1498Szrj struct reg_use
647*38fd1498Szrj {
648*38fd1498Szrj /* The insn where a register has been used. */
649*38fd1498Szrj rtx_insn *insn;
650*38fd1498Szrj /* Points to the memory reference enclosing the use, if any, NULL_RTX
651*38fd1498Szrj otherwise. */
652*38fd1498Szrj rtx containing_mem;
653*38fd1498Szrj /* Location of the register within INSN. */
654*38fd1498Szrj rtx *usep;
655*38fd1498Szrj /* The reverse uid of the insn. */
656*38fd1498Szrj int ruid;
657*38fd1498Szrj };
658*38fd1498Szrj
659*38fd1498Szrj /* If the register is used in some unknown fashion, USE_INDEX is negative.
660*38fd1498Szrj If it is dead, USE_INDEX is RELOAD_COMBINE_MAX_USES, and STORE_RUID
661*38fd1498Szrj indicates where it is first set or clobbered.
662*38fd1498Szrj Otherwise, USE_INDEX is the index of the last encountered use of the
663*38fd1498Szrj register (which is first among these we have seen since we scan backwards).
664*38fd1498Szrj USE_RUID indicates the first encountered, i.e. last, of these uses.
665*38fd1498Szrj If ALL_OFFSETS_MATCH is true, all encountered uses were inside a PLUS
666*38fd1498Szrj with a constant offset; OFFSET contains this constant in that case.
667*38fd1498Szrj STORE_RUID is always meaningful if we only want to use a value in a
668*38fd1498Szrj register in a different place: it denotes the next insn in the insn
669*38fd1498Szrj stream (i.e. the last encountered) that sets or clobbers the register.
670*38fd1498Szrj REAL_STORE_RUID is similar, but clobbers are ignored when updating it. */
671*38fd1498Szrj static struct
672*38fd1498Szrj {
673*38fd1498Szrj struct reg_use reg_use[RELOAD_COMBINE_MAX_USES];
674*38fd1498Szrj rtx offset;
675*38fd1498Szrj int use_index;
676*38fd1498Szrj int store_ruid;
677*38fd1498Szrj int real_store_ruid;
678*38fd1498Szrj int use_ruid;
679*38fd1498Szrj bool all_offsets_match;
680*38fd1498Szrj } reg_state[FIRST_PSEUDO_REGISTER];
681*38fd1498Szrj
682*38fd1498Szrj /* Reverse linear uid. This is increased in reload_combine while scanning
683*38fd1498Szrj the instructions from last to first. It is used to set last_label_ruid
684*38fd1498Szrj and the store_ruid / use_ruid fields in reg_state. */
685*38fd1498Szrj static int reload_combine_ruid;
686*38fd1498Szrj
687*38fd1498Szrj /* The RUID of the last label we encountered in reload_combine. */
688*38fd1498Szrj static int last_label_ruid;
689*38fd1498Szrj
690*38fd1498Szrj /* The RUID of the last jump we encountered in reload_combine. */
691*38fd1498Szrj static int last_jump_ruid;
692*38fd1498Szrj
693*38fd1498Szrj /* The register numbers of the first and last index register. A value of
694*38fd1498Szrj -1 in LAST_INDEX_REG indicates that we've previously computed these
695*38fd1498Szrj values and found no suitable index registers. */
696*38fd1498Szrj static int first_index_reg = -1;
697*38fd1498Szrj static int last_index_reg;
698*38fd1498Szrj
699*38fd1498Szrj #define LABEL_LIVE(LABEL) \
700*38fd1498Szrj (label_live[CODE_LABEL_NUMBER (LABEL) - min_labelno])
701*38fd1498Szrj
702*38fd1498Szrj /* Subroutine of reload_combine_split_ruids, called to fix up a single
703*38fd1498Szrj ruid pointed to by *PRUID if it is higher than SPLIT_RUID. */
704*38fd1498Szrj
705*38fd1498Szrj static inline void
reload_combine_split_one_ruid(int * pruid,int split_ruid)706*38fd1498Szrj reload_combine_split_one_ruid (int *pruid, int split_ruid)
707*38fd1498Szrj {
708*38fd1498Szrj if (*pruid > split_ruid)
709*38fd1498Szrj (*pruid)++;
710*38fd1498Szrj }
711*38fd1498Szrj
712*38fd1498Szrj /* Called when we insert a new insn in a position we've already passed in
713*38fd1498Szrj the scan. Examine all our state, increasing all ruids that are higher
714*38fd1498Szrj than SPLIT_RUID by one in order to make room for a new insn. */
715*38fd1498Szrj
716*38fd1498Szrj static void
reload_combine_split_ruids(int split_ruid)717*38fd1498Szrj reload_combine_split_ruids (int split_ruid)
718*38fd1498Szrj {
719*38fd1498Szrj unsigned i;
720*38fd1498Szrj
721*38fd1498Szrj reload_combine_split_one_ruid (&reload_combine_ruid, split_ruid);
722*38fd1498Szrj reload_combine_split_one_ruid (&last_label_ruid, split_ruid);
723*38fd1498Szrj reload_combine_split_one_ruid (&last_jump_ruid, split_ruid);
724*38fd1498Szrj
725*38fd1498Szrj for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
726*38fd1498Szrj {
727*38fd1498Szrj int j, idx = reg_state[i].use_index;
728*38fd1498Szrj reload_combine_split_one_ruid (®_state[i].use_ruid, split_ruid);
729*38fd1498Szrj reload_combine_split_one_ruid (®_state[i].store_ruid, split_ruid);
730*38fd1498Szrj reload_combine_split_one_ruid (®_state[i].real_store_ruid,
731*38fd1498Szrj split_ruid);
732*38fd1498Szrj if (idx < 0)
733*38fd1498Szrj continue;
734*38fd1498Szrj for (j = idx; j < RELOAD_COMBINE_MAX_USES; j++)
735*38fd1498Szrj {
736*38fd1498Szrj reload_combine_split_one_ruid (®_state[i].reg_use[j].ruid,
737*38fd1498Szrj split_ruid);
738*38fd1498Szrj }
739*38fd1498Szrj }
740*38fd1498Szrj }
741*38fd1498Szrj
742*38fd1498Szrj /* Called when we are about to rescan a previously encountered insn with
743*38fd1498Szrj reload_combine_note_use after modifying some part of it. This clears all
744*38fd1498Szrj information about uses in that particular insn. */
745*38fd1498Szrj
746*38fd1498Szrj static void
reload_combine_purge_insn_uses(rtx_insn * insn)747*38fd1498Szrj reload_combine_purge_insn_uses (rtx_insn *insn)
748*38fd1498Szrj {
749*38fd1498Szrj unsigned i;
750*38fd1498Szrj
751*38fd1498Szrj for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
752*38fd1498Szrj {
753*38fd1498Szrj int j, k, idx = reg_state[i].use_index;
754*38fd1498Szrj if (idx < 0)
755*38fd1498Szrj continue;
756*38fd1498Szrj j = k = RELOAD_COMBINE_MAX_USES;
757*38fd1498Szrj while (j-- > idx)
758*38fd1498Szrj {
759*38fd1498Szrj if (reg_state[i].reg_use[j].insn != insn)
760*38fd1498Szrj {
761*38fd1498Szrj k--;
762*38fd1498Szrj if (k != j)
763*38fd1498Szrj reg_state[i].reg_use[k] = reg_state[i].reg_use[j];
764*38fd1498Szrj }
765*38fd1498Szrj }
766*38fd1498Szrj reg_state[i].use_index = k;
767*38fd1498Szrj }
768*38fd1498Szrj }
769*38fd1498Szrj
770*38fd1498Szrj /* Called when we need to forget about all uses of REGNO after an insn
771*38fd1498Szrj which is identified by RUID. */
772*38fd1498Szrj
773*38fd1498Szrj static void
reload_combine_purge_reg_uses_after_ruid(unsigned regno,int ruid)774*38fd1498Szrj reload_combine_purge_reg_uses_after_ruid (unsigned regno, int ruid)
775*38fd1498Szrj {
776*38fd1498Szrj int j, k, idx = reg_state[regno].use_index;
777*38fd1498Szrj if (idx < 0)
778*38fd1498Szrj return;
779*38fd1498Szrj j = k = RELOAD_COMBINE_MAX_USES;
780*38fd1498Szrj while (j-- > idx)
781*38fd1498Szrj {
782*38fd1498Szrj if (reg_state[regno].reg_use[j].ruid >= ruid)
783*38fd1498Szrj {
784*38fd1498Szrj k--;
785*38fd1498Szrj if (k != j)
786*38fd1498Szrj reg_state[regno].reg_use[k] = reg_state[regno].reg_use[j];
787*38fd1498Szrj }
788*38fd1498Szrj }
789*38fd1498Szrj reg_state[regno].use_index = k;
790*38fd1498Szrj }
791*38fd1498Szrj
792*38fd1498Szrj /* Find the use of REGNO with the ruid that is highest among those
793*38fd1498Szrj lower than RUID_LIMIT, and return it if it is the only use of this
794*38fd1498Szrj reg in the insn. Return NULL otherwise. */
795*38fd1498Szrj
796*38fd1498Szrj static struct reg_use *
reload_combine_closest_single_use(unsigned regno,int ruid_limit)797*38fd1498Szrj reload_combine_closest_single_use (unsigned regno, int ruid_limit)
798*38fd1498Szrj {
799*38fd1498Szrj int i, best_ruid = 0;
800*38fd1498Szrj int use_idx = reg_state[regno].use_index;
801*38fd1498Szrj struct reg_use *retval;
802*38fd1498Szrj
803*38fd1498Szrj if (use_idx < 0)
804*38fd1498Szrj return NULL;
805*38fd1498Szrj retval = NULL;
806*38fd1498Szrj for (i = use_idx; i < RELOAD_COMBINE_MAX_USES; i++)
807*38fd1498Szrj {
808*38fd1498Szrj struct reg_use *use = reg_state[regno].reg_use + i;
809*38fd1498Szrj int this_ruid = use->ruid;
810*38fd1498Szrj if (this_ruid >= ruid_limit)
811*38fd1498Szrj continue;
812*38fd1498Szrj if (this_ruid > best_ruid)
813*38fd1498Szrj {
814*38fd1498Szrj best_ruid = this_ruid;
815*38fd1498Szrj retval = use;
816*38fd1498Szrj }
817*38fd1498Szrj else if (this_ruid == best_ruid)
818*38fd1498Szrj retval = NULL;
819*38fd1498Szrj }
820*38fd1498Szrj if (last_label_ruid >= best_ruid)
821*38fd1498Szrj return NULL;
822*38fd1498Szrj return retval;
823*38fd1498Szrj }
824*38fd1498Szrj
825*38fd1498Szrj /* After we've moved an add insn, fix up any debug insns that occur
826*38fd1498Szrj between the old location of the add and the new location. REG is
827*38fd1498Szrj the destination register of the add insn; REPLACEMENT is the
828*38fd1498Szrj SET_SRC of the add. FROM and TO specify the range in which we
829*38fd1498Szrj should make this change on debug insns. */
830*38fd1498Szrj
831*38fd1498Szrj static void
fixup_debug_insns(rtx reg,rtx replacement,rtx_insn * from,rtx_insn * to)832*38fd1498Szrj fixup_debug_insns (rtx reg, rtx replacement, rtx_insn *from, rtx_insn *to)
833*38fd1498Szrj {
834*38fd1498Szrj rtx_insn *insn;
835*38fd1498Szrj for (insn = from; insn != to; insn = NEXT_INSN (insn))
836*38fd1498Szrj {
837*38fd1498Szrj rtx t;
838*38fd1498Szrj
839*38fd1498Szrj if (!DEBUG_BIND_INSN_P (insn))
840*38fd1498Szrj continue;
841*38fd1498Szrj
842*38fd1498Szrj t = INSN_VAR_LOCATION_LOC (insn);
843*38fd1498Szrj t = simplify_replace_rtx (t, reg, replacement);
844*38fd1498Szrj validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), t, 0);
845*38fd1498Szrj }
846*38fd1498Szrj }
847*38fd1498Szrj
848*38fd1498Szrj /* Subroutine of reload_combine_recognize_const_pattern. Try to replace REG
849*38fd1498Szrj with SRC in the insn described by USE, taking costs into account. Return
850*38fd1498Szrj true if we made the replacement. */
851*38fd1498Szrj
852*38fd1498Szrj static bool
try_replace_in_use(struct reg_use * use,rtx reg,rtx src)853*38fd1498Szrj try_replace_in_use (struct reg_use *use, rtx reg, rtx src)
854*38fd1498Szrj {
855*38fd1498Szrj rtx_insn *use_insn = use->insn;
856*38fd1498Szrj rtx mem = use->containing_mem;
857*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
858*38fd1498Szrj
859*38fd1498Szrj if (mem != NULL_RTX)
860*38fd1498Szrj {
861*38fd1498Szrj addr_space_t as = MEM_ADDR_SPACE (mem);
862*38fd1498Szrj rtx oldaddr = XEXP (mem, 0);
863*38fd1498Szrj rtx newaddr = NULL_RTX;
864*38fd1498Szrj int old_cost = address_cost (oldaddr, GET_MODE (mem), as, speed);
865*38fd1498Szrj int new_cost;
866*38fd1498Szrj
867*38fd1498Szrj newaddr = simplify_replace_rtx (oldaddr, reg, src);
868*38fd1498Szrj if (memory_address_addr_space_p (GET_MODE (mem), newaddr, as))
869*38fd1498Szrj {
870*38fd1498Szrj XEXP (mem, 0) = newaddr;
871*38fd1498Szrj new_cost = address_cost (newaddr, GET_MODE (mem), as, speed);
872*38fd1498Szrj XEXP (mem, 0) = oldaddr;
873*38fd1498Szrj if (new_cost <= old_cost
874*38fd1498Szrj && validate_change (use_insn,
875*38fd1498Szrj &XEXP (mem, 0), newaddr, 0))
876*38fd1498Szrj return true;
877*38fd1498Szrj }
878*38fd1498Szrj }
879*38fd1498Szrj else
880*38fd1498Szrj {
881*38fd1498Szrj rtx new_set = single_set (use_insn);
882*38fd1498Szrj if (new_set
883*38fd1498Szrj && REG_P (SET_DEST (new_set))
884*38fd1498Szrj && GET_CODE (SET_SRC (new_set)) == PLUS
885*38fd1498Szrj && REG_P (XEXP (SET_SRC (new_set), 0))
886*38fd1498Szrj && CONSTANT_P (XEXP (SET_SRC (new_set), 1)))
887*38fd1498Szrj {
888*38fd1498Szrj rtx new_src;
889*38fd1498Szrj machine_mode mode = GET_MODE (SET_DEST (new_set));
890*38fd1498Szrj int old_cost = set_src_cost (SET_SRC (new_set), mode, speed);
891*38fd1498Szrj
892*38fd1498Szrj gcc_assert (rtx_equal_p (XEXP (SET_SRC (new_set), 0), reg));
893*38fd1498Szrj new_src = simplify_replace_rtx (SET_SRC (new_set), reg, src);
894*38fd1498Szrj
895*38fd1498Szrj if (set_src_cost (new_src, mode, speed) <= old_cost
896*38fd1498Szrj && validate_change (use_insn, &SET_SRC (new_set),
897*38fd1498Szrj new_src, 0))
898*38fd1498Szrj return true;
899*38fd1498Szrj }
900*38fd1498Szrj }
901*38fd1498Szrj return false;
902*38fd1498Szrj }
903*38fd1498Szrj
904*38fd1498Szrj /* Called by reload_combine when scanning INSN. This function tries to detect
905*38fd1498Szrj patterns where a constant is added to a register, and the result is used
906*38fd1498Szrj in an address.
907*38fd1498Szrj Return true if no further processing is needed on INSN; false if it wasn't
908*38fd1498Szrj recognized and should be handled normally. */
909*38fd1498Szrj
910*38fd1498Szrj static bool
reload_combine_recognize_const_pattern(rtx_insn * insn)911*38fd1498Szrj reload_combine_recognize_const_pattern (rtx_insn *insn)
912*38fd1498Szrj {
913*38fd1498Szrj int from_ruid = reload_combine_ruid;
914*38fd1498Szrj rtx set, pat, reg, src, addreg;
915*38fd1498Szrj unsigned int regno;
916*38fd1498Szrj struct reg_use *use;
917*38fd1498Szrj bool must_move_add;
918*38fd1498Szrj rtx_insn *add_moved_after_insn = NULL;
919*38fd1498Szrj int add_moved_after_ruid = 0;
920*38fd1498Szrj int clobbered_regno = -1;
921*38fd1498Szrj
922*38fd1498Szrj set = single_set (insn);
923*38fd1498Szrj if (set == NULL_RTX)
924*38fd1498Szrj return false;
925*38fd1498Szrj
926*38fd1498Szrj reg = SET_DEST (set);
927*38fd1498Szrj src = SET_SRC (set);
928*38fd1498Szrj if (!REG_P (reg)
929*38fd1498Szrj || REG_NREGS (reg) != 1
930*38fd1498Szrj || GET_MODE (reg) != Pmode
931*38fd1498Szrj || reg == stack_pointer_rtx)
932*38fd1498Szrj return false;
933*38fd1498Szrj
934*38fd1498Szrj regno = REGNO (reg);
935*38fd1498Szrj
936*38fd1498Szrj /* We look for a REG1 = REG2 + CONSTANT insn, followed by either
937*38fd1498Szrj uses of REG1 inside an address, or inside another add insn. If
938*38fd1498Szrj possible and profitable, merge the addition into subsequent
939*38fd1498Szrj uses. */
940*38fd1498Szrj if (GET_CODE (src) != PLUS
941*38fd1498Szrj || !REG_P (XEXP (src, 0))
942*38fd1498Szrj || !CONSTANT_P (XEXP (src, 1)))
943*38fd1498Szrj return false;
944*38fd1498Szrj
945*38fd1498Szrj addreg = XEXP (src, 0);
946*38fd1498Szrj must_move_add = rtx_equal_p (reg, addreg);
947*38fd1498Szrj
948*38fd1498Szrj pat = PATTERN (insn);
949*38fd1498Szrj if (must_move_add && set != pat)
950*38fd1498Szrj {
951*38fd1498Szrj /* We have to be careful when moving the add; apart from the
952*38fd1498Szrj single_set there may also be clobbers. Recognize one special
953*38fd1498Szrj case, that of one clobber alongside the set (likely a clobber
954*38fd1498Szrj of the CC register). */
955*38fd1498Szrj gcc_assert (GET_CODE (PATTERN (insn)) == PARALLEL);
956*38fd1498Szrj if (XVECLEN (pat, 0) != 2 || XVECEXP (pat, 0, 0) != set
957*38fd1498Szrj || GET_CODE (XVECEXP (pat, 0, 1)) != CLOBBER
958*38fd1498Szrj || !REG_P (XEXP (XVECEXP (pat, 0, 1), 0)))
959*38fd1498Szrj return false;
960*38fd1498Szrj clobbered_regno = REGNO (XEXP (XVECEXP (pat, 0, 1), 0));
961*38fd1498Szrj }
962*38fd1498Szrj
963*38fd1498Szrj do
964*38fd1498Szrj {
965*38fd1498Szrj use = reload_combine_closest_single_use (regno, from_ruid);
966*38fd1498Szrj
967*38fd1498Szrj if (use)
968*38fd1498Szrj /* Start the search for the next use from here. */
969*38fd1498Szrj from_ruid = use->ruid;
970*38fd1498Szrj
971*38fd1498Szrj if (use && GET_MODE (*use->usep) == Pmode)
972*38fd1498Szrj {
973*38fd1498Szrj bool delete_add = false;
974*38fd1498Szrj rtx_insn *use_insn = use->insn;
975*38fd1498Szrj int use_ruid = use->ruid;
976*38fd1498Szrj
977*38fd1498Szrj /* Avoid moving the add insn past a jump. */
978*38fd1498Szrj if (must_move_add && use_ruid <= last_jump_ruid)
979*38fd1498Szrj break;
980*38fd1498Szrj
981*38fd1498Szrj /* If the add clobbers another hard reg in parallel, don't move
982*38fd1498Szrj it past a real set of this hard reg. */
983*38fd1498Szrj if (must_move_add && clobbered_regno >= 0
984*38fd1498Szrj && reg_state[clobbered_regno].real_store_ruid >= use_ruid)
985*38fd1498Szrj break;
986*38fd1498Szrj
987*38fd1498Szrj /* Do not separate cc0 setter and cc0 user on HAVE_cc0 targets. */
988*38fd1498Szrj if (HAVE_cc0 && must_move_add && sets_cc0_p (PATTERN (use_insn)))
989*38fd1498Szrj break;
990*38fd1498Szrj
991*38fd1498Szrj gcc_assert (reg_state[regno].store_ruid <= use_ruid);
992*38fd1498Szrj /* Avoid moving a use of ADDREG past a point where it is stored. */
993*38fd1498Szrj if (reg_state[REGNO (addreg)].store_ruid > use_ruid)
994*38fd1498Szrj break;
995*38fd1498Szrj
996*38fd1498Szrj /* We also must not move the addition past an insn that sets
997*38fd1498Szrj the same register, unless we can combine two add insns. */
998*38fd1498Szrj if (must_move_add && reg_state[regno].store_ruid == use_ruid)
999*38fd1498Szrj {
1000*38fd1498Szrj if (use->containing_mem == NULL_RTX)
1001*38fd1498Szrj delete_add = true;
1002*38fd1498Szrj else
1003*38fd1498Szrj break;
1004*38fd1498Szrj }
1005*38fd1498Szrj
1006*38fd1498Szrj if (try_replace_in_use (use, reg, src))
1007*38fd1498Szrj {
1008*38fd1498Szrj reload_combine_purge_insn_uses (use_insn);
1009*38fd1498Szrj reload_combine_note_use (&PATTERN (use_insn), use_insn,
1010*38fd1498Szrj use_ruid, NULL_RTX);
1011*38fd1498Szrj
1012*38fd1498Szrj if (delete_add)
1013*38fd1498Szrj {
1014*38fd1498Szrj fixup_debug_insns (reg, src, insn, use_insn);
1015*38fd1498Szrj delete_insn (insn);
1016*38fd1498Szrj return true;
1017*38fd1498Szrj }
1018*38fd1498Szrj if (must_move_add)
1019*38fd1498Szrj {
1020*38fd1498Szrj add_moved_after_insn = use_insn;
1021*38fd1498Szrj add_moved_after_ruid = use_ruid;
1022*38fd1498Szrj }
1023*38fd1498Szrj continue;
1024*38fd1498Szrj }
1025*38fd1498Szrj }
1026*38fd1498Szrj /* If we get here, we couldn't handle this use. */
1027*38fd1498Szrj if (must_move_add)
1028*38fd1498Szrj break;
1029*38fd1498Szrj }
1030*38fd1498Szrj while (use);
1031*38fd1498Szrj
1032*38fd1498Szrj if (!must_move_add || add_moved_after_insn == NULL_RTX)
1033*38fd1498Szrj /* Process the add normally. */
1034*38fd1498Szrj return false;
1035*38fd1498Szrj
1036*38fd1498Szrj fixup_debug_insns (reg, src, insn, add_moved_after_insn);
1037*38fd1498Szrj
1038*38fd1498Szrj reorder_insns (insn, insn, add_moved_after_insn);
1039*38fd1498Szrj reload_combine_purge_reg_uses_after_ruid (regno, add_moved_after_ruid);
1040*38fd1498Szrj reload_combine_split_ruids (add_moved_after_ruid - 1);
1041*38fd1498Szrj reload_combine_note_use (&PATTERN (insn), insn,
1042*38fd1498Szrj add_moved_after_ruid, NULL_RTX);
1043*38fd1498Szrj reg_state[regno].store_ruid = add_moved_after_ruid;
1044*38fd1498Szrj
1045*38fd1498Szrj return true;
1046*38fd1498Szrj }
1047*38fd1498Szrj
1048*38fd1498Szrj /* Called by reload_combine when scanning INSN. Try to detect a pattern we
1049*38fd1498Szrj can handle and improve. Return true if no further processing is needed on
1050*38fd1498Szrj INSN; false if it wasn't recognized and should be handled normally. */
1051*38fd1498Szrj
1052*38fd1498Szrj static bool
reload_combine_recognize_pattern(rtx_insn * insn)1053*38fd1498Szrj reload_combine_recognize_pattern (rtx_insn *insn)
1054*38fd1498Szrj {
1055*38fd1498Szrj rtx set, reg, src;
1056*38fd1498Szrj
1057*38fd1498Szrj set = single_set (insn);
1058*38fd1498Szrj if (set == NULL_RTX)
1059*38fd1498Szrj return false;
1060*38fd1498Szrj
1061*38fd1498Szrj reg = SET_DEST (set);
1062*38fd1498Szrj src = SET_SRC (set);
1063*38fd1498Szrj if (!REG_P (reg) || REG_NREGS (reg) != 1)
1064*38fd1498Szrj return false;
1065*38fd1498Szrj
1066*38fd1498Szrj unsigned int regno = REGNO (reg);
1067*38fd1498Szrj machine_mode mode = GET_MODE (reg);
1068*38fd1498Szrj
1069*38fd1498Szrj if (reg_state[regno].use_index < 0
1070*38fd1498Szrj || reg_state[regno].use_index >= RELOAD_COMBINE_MAX_USES)
1071*38fd1498Szrj return false;
1072*38fd1498Szrj
1073*38fd1498Szrj for (int i = reg_state[regno].use_index;
1074*38fd1498Szrj i < RELOAD_COMBINE_MAX_USES; i++)
1075*38fd1498Szrj {
1076*38fd1498Szrj struct reg_use *use = reg_state[regno].reg_use + i;
1077*38fd1498Szrj if (GET_MODE (*use->usep) != mode)
1078*38fd1498Szrj return false;
1079*38fd1498Szrj }
1080*38fd1498Szrj
1081*38fd1498Szrj /* Look for (set (REGX) (CONST_INT))
1082*38fd1498Szrj (set (REGX) (PLUS (REGX) (REGY)))
1083*38fd1498Szrj ...
1084*38fd1498Szrj ... (MEM (REGX)) ...
1085*38fd1498Szrj and convert it to
1086*38fd1498Szrj (set (REGZ) (CONST_INT))
1087*38fd1498Szrj ...
1088*38fd1498Szrj ... (MEM (PLUS (REGZ) (REGY)))... .
1089*38fd1498Szrj
1090*38fd1498Szrj First, check that we have (set (REGX) (PLUS (REGX) (REGY)))
1091*38fd1498Szrj and that we know all uses of REGX before it dies.
1092*38fd1498Szrj Also, explicitly check that REGX != REGY; our life information
1093*38fd1498Szrj does not yet show whether REGY changes in this insn. */
1094*38fd1498Szrj
1095*38fd1498Szrj if (GET_CODE (src) == PLUS
1096*38fd1498Szrj && reg_state[regno].all_offsets_match
1097*38fd1498Szrj && last_index_reg != -1
1098*38fd1498Szrj && REG_P (XEXP (src, 1))
1099*38fd1498Szrj && rtx_equal_p (XEXP (src, 0), reg)
1100*38fd1498Szrj && !rtx_equal_p (XEXP (src, 1), reg)
1101*38fd1498Szrj && last_label_ruid < reg_state[regno].use_ruid)
1102*38fd1498Szrj {
1103*38fd1498Szrj rtx base = XEXP (src, 1);
1104*38fd1498Szrj rtx_insn *prev = prev_nonnote_nondebug_insn (insn);
1105*38fd1498Szrj rtx prev_set = prev ? single_set (prev) : NULL_RTX;
1106*38fd1498Szrj rtx index_reg = NULL_RTX;
1107*38fd1498Szrj rtx reg_sum = NULL_RTX;
1108*38fd1498Szrj int i;
1109*38fd1498Szrj
1110*38fd1498Szrj /* Now we need to set INDEX_REG to an index register (denoted as
1111*38fd1498Szrj REGZ in the illustration above) and REG_SUM to the expression
1112*38fd1498Szrj register+register that we want to use to substitute uses of REG
1113*38fd1498Szrj (typically in MEMs) with. First check REG and BASE for being
1114*38fd1498Szrj index registers; we can use them even if they are not dead. */
1115*38fd1498Szrj if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], regno)
1116*38fd1498Szrj || TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS],
1117*38fd1498Szrj REGNO (base)))
1118*38fd1498Szrj {
1119*38fd1498Szrj index_reg = reg;
1120*38fd1498Szrj reg_sum = src;
1121*38fd1498Szrj }
1122*38fd1498Szrj else
1123*38fd1498Szrj {
1124*38fd1498Szrj /* Otherwise, look for a free index register. Since we have
1125*38fd1498Szrj checked above that neither REG nor BASE are index registers,
1126*38fd1498Szrj if we find anything at all, it will be different from these
1127*38fd1498Szrj two registers. */
1128*38fd1498Szrj for (i = first_index_reg; i <= last_index_reg; i++)
1129*38fd1498Szrj {
1130*38fd1498Szrj if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], i)
1131*38fd1498Szrj && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES
1132*38fd1498Szrj && reg_state[i].store_ruid <= reg_state[regno].use_ruid
1133*38fd1498Szrj && (call_used_regs[i] || df_regs_ever_live_p (i))
1134*38fd1498Szrj && (!frame_pointer_needed || i != HARD_FRAME_POINTER_REGNUM)
1135*38fd1498Szrj && !fixed_regs[i] && !global_regs[i]
1136*38fd1498Szrj && hard_regno_nregs (i, GET_MODE (reg)) == 1
1137*38fd1498Szrj && targetm.hard_regno_scratch_ok (i))
1138*38fd1498Szrj {
1139*38fd1498Szrj index_reg = gen_rtx_REG (GET_MODE (reg), i);
1140*38fd1498Szrj reg_sum = gen_rtx_PLUS (GET_MODE (reg), index_reg, base);
1141*38fd1498Szrj break;
1142*38fd1498Szrj }
1143*38fd1498Szrj }
1144*38fd1498Szrj }
1145*38fd1498Szrj
1146*38fd1498Szrj /* Check that PREV_SET is indeed (set (REGX) (CONST_INT)) and that
1147*38fd1498Szrj (REGY), i.e. BASE, is not clobbered before the last use we'll
1148*38fd1498Szrj create. */
1149*38fd1498Szrj if (reg_sum
1150*38fd1498Szrj && prev_set
1151*38fd1498Szrj && CONST_INT_P (SET_SRC (prev_set))
1152*38fd1498Szrj && rtx_equal_p (SET_DEST (prev_set), reg)
1153*38fd1498Szrj && (reg_state[REGNO (base)].store_ruid
1154*38fd1498Szrj <= reg_state[regno].use_ruid))
1155*38fd1498Szrj {
1156*38fd1498Szrj /* Change destination register and, if necessary, the constant
1157*38fd1498Szrj value in PREV, the constant loading instruction. */
1158*38fd1498Szrj validate_change (prev, &SET_DEST (prev_set), index_reg, 1);
1159*38fd1498Szrj if (reg_state[regno].offset != const0_rtx)
1160*38fd1498Szrj {
1161*38fd1498Szrj HOST_WIDE_INT c
1162*38fd1498Szrj = trunc_int_for_mode (UINTVAL (SET_SRC (prev_set))
1163*38fd1498Szrj + UINTVAL (reg_state[regno].offset),
1164*38fd1498Szrj GET_MODE (index_reg));
1165*38fd1498Szrj validate_change (prev, &SET_SRC (prev_set), GEN_INT (c), 1);
1166*38fd1498Szrj }
1167*38fd1498Szrj
1168*38fd1498Szrj /* Now for every use of REG that we have recorded, replace REG
1169*38fd1498Szrj with REG_SUM. */
1170*38fd1498Szrj for (i = reg_state[regno].use_index;
1171*38fd1498Szrj i < RELOAD_COMBINE_MAX_USES; i++)
1172*38fd1498Szrj validate_unshare_change (reg_state[regno].reg_use[i].insn,
1173*38fd1498Szrj reg_state[regno].reg_use[i].usep,
1174*38fd1498Szrj /* Each change must have its own
1175*38fd1498Szrj replacement. */
1176*38fd1498Szrj reg_sum, 1);
1177*38fd1498Szrj
1178*38fd1498Szrj if (apply_change_group ())
1179*38fd1498Szrj {
1180*38fd1498Szrj struct reg_use *lowest_ruid = NULL;
1181*38fd1498Szrj
1182*38fd1498Szrj /* For every new use of REG_SUM, we have to record the use
1183*38fd1498Szrj of BASE therein, i.e. operand 1. */
1184*38fd1498Szrj for (i = reg_state[regno].use_index;
1185*38fd1498Szrj i < RELOAD_COMBINE_MAX_USES; i++)
1186*38fd1498Szrj {
1187*38fd1498Szrj struct reg_use *use = reg_state[regno].reg_use + i;
1188*38fd1498Szrj reload_combine_note_use (&XEXP (*use->usep, 1), use->insn,
1189*38fd1498Szrj use->ruid, use->containing_mem);
1190*38fd1498Szrj if (lowest_ruid == NULL || use->ruid < lowest_ruid->ruid)
1191*38fd1498Szrj lowest_ruid = use;
1192*38fd1498Szrj }
1193*38fd1498Szrj
1194*38fd1498Szrj fixup_debug_insns (reg, reg_sum, insn, lowest_ruid->insn);
1195*38fd1498Szrj
1196*38fd1498Szrj /* Delete the reg-reg addition. */
1197*38fd1498Szrj delete_insn (insn);
1198*38fd1498Szrj
1199*38fd1498Szrj if (reg_state[regno].offset != const0_rtx
1200*38fd1498Szrj /* Previous REG_EQUIV / REG_EQUAL notes for PREV
1201*38fd1498Szrj are now invalid. */
1202*38fd1498Szrj && remove_reg_equal_equiv_notes (prev))
1203*38fd1498Szrj df_notes_rescan (prev);
1204*38fd1498Szrj
1205*38fd1498Szrj reg_state[regno].use_index = RELOAD_COMBINE_MAX_USES;
1206*38fd1498Szrj return true;
1207*38fd1498Szrj }
1208*38fd1498Szrj }
1209*38fd1498Szrj }
1210*38fd1498Szrj return false;
1211*38fd1498Szrj }
1212*38fd1498Szrj
1213*38fd1498Szrj static void
reload_combine(void)1214*38fd1498Szrj reload_combine (void)
1215*38fd1498Szrj {
1216*38fd1498Szrj rtx_insn *insn, *prev;
1217*38fd1498Szrj basic_block bb;
1218*38fd1498Szrj unsigned int r;
1219*38fd1498Szrj int min_labelno, n_labels;
1220*38fd1498Szrj HARD_REG_SET ever_live_at_start, *label_live;
1221*38fd1498Szrj
1222*38fd1498Szrj /* To avoid wasting too much time later searching for an index register,
1223*38fd1498Szrj determine the minimum and maximum index register numbers. */
1224*38fd1498Szrj if (INDEX_REG_CLASS == NO_REGS)
1225*38fd1498Szrj last_index_reg = -1;
1226*38fd1498Szrj else if (first_index_reg == -1 && last_index_reg == 0)
1227*38fd1498Szrj {
1228*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1229*38fd1498Szrj if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], r))
1230*38fd1498Szrj {
1231*38fd1498Szrj if (first_index_reg == -1)
1232*38fd1498Szrj first_index_reg = r;
1233*38fd1498Szrj
1234*38fd1498Szrj last_index_reg = r;
1235*38fd1498Szrj }
1236*38fd1498Szrj
1237*38fd1498Szrj /* If no index register is available, we can quit now. Set LAST_INDEX_REG
1238*38fd1498Szrj to -1 so we'll know to quit early the next time we get here. */
1239*38fd1498Szrj if (first_index_reg == -1)
1240*38fd1498Szrj {
1241*38fd1498Szrj last_index_reg = -1;
1242*38fd1498Szrj return;
1243*38fd1498Szrj }
1244*38fd1498Szrj }
1245*38fd1498Szrj
1246*38fd1498Szrj /* Set up LABEL_LIVE and EVER_LIVE_AT_START. The register lifetime
1247*38fd1498Szrj information is a bit fuzzy immediately after reload, but it's
1248*38fd1498Szrj still good enough to determine which registers are live at a jump
1249*38fd1498Szrj destination. */
1250*38fd1498Szrj min_labelno = get_first_label_num ();
1251*38fd1498Szrj n_labels = max_label_num () - min_labelno;
1252*38fd1498Szrj label_live = XNEWVEC (HARD_REG_SET, n_labels);
1253*38fd1498Szrj CLEAR_HARD_REG_SET (ever_live_at_start);
1254*38fd1498Szrj
1255*38fd1498Szrj FOR_EACH_BB_REVERSE_FN (bb, cfun)
1256*38fd1498Szrj {
1257*38fd1498Szrj insn = BB_HEAD (bb);
1258*38fd1498Szrj if (LABEL_P (insn))
1259*38fd1498Szrj {
1260*38fd1498Szrj HARD_REG_SET live;
1261*38fd1498Szrj bitmap live_in = df_get_live_in (bb);
1262*38fd1498Szrj
1263*38fd1498Szrj REG_SET_TO_HARD_REG_SET (live, live_in);
1264*38fd1498Szrj compute_use_by_pseudos (&live, live_in);
1265*38fd1498Szrj COPY_HARD_REG_SET (LABEL_LIVE (insn), live);
1266*38fd1498Szrj IOR_HARD_REG_SET (ever_live_at_start, live);
1267*38fd1498Szrj }
1268*38fd1498Szrj }
1269*38fd1498Szrj
1270*38fd1498Szrj /* Initialize last_label_ruid, reload_combine_ruid and reg_state. */
1271*38fd1498Szrj last_label_ruid = last_jump_ruid = reload_combine_ruid = 0;
1272*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1273*38fd1498Szrj {
1274*38fd1498Szrj reg_state[r].store_ruid = 0;
1275*38fd1498Szrj reg_state[r].real_store_ruid = 0;
1276*38fd1498Szrj if (fixed_regs[r])
1277*38fd1498Szrj reg_state[r].use_index = -1;
1278*38fd1498Szrj else
1279*38fd1498Szrj reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1280*38fd1498Szrj }
1281*38fd1498Szrj
1282*38fd1498Szrj for (insn = get_last_insn (); insn; insn = prev)
1283*38fd1498Szrj {
1284*38fd1498Szrj bool control_flow_insn;
1285*38fd1498Szrj rtx note;
1286*38fd1498Szrj
1287*38fd1498Szrj prev = PREV_INSN (insn);
1288*38fd1498Szrj
1289*38fd1498Szrj /* We cannot do our optimization across labels. Invalidating all the use
1290*38fd1498Szrj information we have would be costly, so we just note where the label
1291*38fd1498Szrj is and then later disable any optimization that would cross it. */
1292*38fd1498Szrj if (LABEL_P (insn))
1293*38fd1498Szrj last_label_ruid = reload_combine_ruid;
1294*38fd1498Szrj else if (BARRIER_P (insn))
1295*38fd1498Szrj {
1296*38fd1498Szrj /* Crossing a barrier resets all the use information. */
1297*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1298*38fd1498Szrj if (! fixed_regs[r])
1299*38fd1498Szrj reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1300*38fd1498Szrj }
1301*38fd1498Szrj else if (INSN_P (insn) && volatile_insn_p (PATTERN (insn)))
1302*38fd1498Szrj /* Optimizations across insns being marked as volatile must be
1303*38fd1498Szrj prevented. All the usage information is invalidated
1304*38fd1498Szrj here. */
1305*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1306*38fd1498Szrj if (! fixed_regs[r]
1307*38fd1498Szrj && reg_state[r].use_index != RELOAD_COMBINE_MAX_USES)
1308*38fd1498Szrj reg_state[r].use_index = -1;
1309*38fd1498Szrj
1310*38fd1498Szrj if (! NONDEBUG_INSN_P (insn))
1311*38fd1498Szrj continue;
1312*38fd1498Szrj
1313*38fd1498Szrj reload_combine_ruid++;
1314*38fd1498Szrj
1315*38fd1498Szrj control_flow_insn = control_flow_insn_p (insn);
1316*38fd1498Szrj if (control_flow_insn)
1317*38fd1498Szrj last_jump_ruid = reload_combine_ruid;
1318*38fd1498Szrj
1319*38fd1498Szrj if (reload_combine_recognize_const_pattern (insn)
1320*38fd1498Szrj || reload_combine_recognize_pattern (insn))
1321*38fd1498Szrj continue;
1322*38fd1498Szrj
1323*38fd1498Szrj note_stores (PATTERN (insn), reload_combine_note_store, NULL);
1324*38fd1498Szrj
1325*38fd1498Szrj if (CALL_P (insn))
1326*38fd1498Szrj {
1327*38fd1498Szrj rtx link;
1328*38fd1498Szrj HARD_REG_SET used_regs;
1329*38fd1498Szrj
1330*38fd1498Szrj get_call_reg_set_usage (insn, &used_regs, call_used_reg_set);
1331*38fd1498Szrj
1332*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1333*38fd1498Szrj if (TEST_HARD_REG_BIT (used_regs, r))
1334*38fd1498Szrj {
1335*38fd1498Szrj reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1336*38fd1498Szrj reg_state[r].store_ruid = reload_combine_ruid;
1337*38fd1498Szrj }
1338*38fd1498Szrj
1339*38fd1498Szrj for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
1340*38fd1498Szrj link = XEXP (link, 1))
1341*38fd1498Szrj {
1342*38fd1498Szrj rtx setuse = XEXP (link, 0);
1343*38fd1498Szrj rtx usage_rtx = XEXP (setuse, 0);
1344*38fd1498Szrj if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER)
1345*38fd1498Szrj && REG_P (usage_rtx))
1346*38fd1498Szrj {
1347*38fd1498Szrj unsigned int end_regno = END_REGNO (usage_rtx);
1348*38fd1498Szrj for (unsigned int i = REGNO (usage_rtx); i < end_regno; ++i)
1349*38fd1498Szrj if (GET_CODE (XEXP (link, 0)) == CLOBBER)
1350*38fd1498Szrj {
1351*38fd1498Szrj reg_state[i].use_index = RELOAD_COMBINE_MAX_USES;
1352*38fd1498Szrj reg_state[i].store_ruid = reload_combine_ruid;
1353*38fd1498Szrj }
1354*38fd1498Szrj else
1355*38fd1498Szrj reg_state[i].use_index = -1;
1356*38fd1498Szrj }
1357*38fd1498Szrj }
1358*38fd1498Szrj }
1359*38fd1498Szrj
1360*38fd1498Szrj if (control_flow_insn && !ANY_RETURN_P (PATTERN (insn)))
1361*38fd1498Szrj {
1362*38fd1498Szrj /* Non-spill registers might be used at the call destination in
1363*38fd1498Szrj some unknown fashion, so we have to mark the unknown use. */
1364*38fd1498Szrj HARD_REG_SET *live;
1365*38fd1498Szrj
1366*38fd1498Szrj if ((condjump_p (insn) || condjump_in_parallel_p (insn))
1367*38fd1498Szrj && JUMP_LABEL (insn))
1368*38fd1498Szrj {
1369*38fd1498Szrj if (ANY_RETURN_P (JUMP_LABEL (insn)))
1370*38fd1498Szrj live = NULL;
1371*38fd1498Szrj else
1372*38fd1498Szrj live = &LABEL_LIVE (JUMP_LABEL (insn));
1373*38fd1498Szrj }
1374*38fd1498Szrj else
1375*38fd1498Szrj live = &ever_live_at_start;
1376*38fd1498Szrj
1377*38fd1498Szrj if (live)
1378*38fd1498Szrj for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1379*38fd1498Szrj if (TEST_HARD_REG_BIT (*live, r))
1380*38fd1498Szrj reg_state[r].use_index = -1;
1381*38fd1498Szrj }
1382*38fd1498Szrj
1383*38fd1498Szrj reload_combine_note_use (&PATTERN (insn), insn, reload_combine_ruid,
1384*38fd1498Szrj NULL_RTX);
1385*38fd1498Szrj
1386*38fd1498Szrj for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1387*38fd1498Szrj {
1388*38fd1498Szrj if (REG_NOTE_KIND (note) == REG_INC && REG_P (XEXP (note, 0)))
1389*38fd1498Szrj {
1390*38fd1498Szrj int regno = REGNO (XEXP (note, 0));
1391*38fd1498Szrj reg_state[regno].store_ruid = reload_combine_ruid;
1392*38fd1498Szrj reg_state[regno].real_store_ruid = reload_combine_ruid;
1393*38fd1498Szrj reg_state[regno].use_index = -1;
1394*38fd1498Szrj }
1395*38fd1498Szrj }
1396*38fd1498Szrj }
1397*38fd1498Szrj
1398*38fd1498Szrj free (label_live);
1399*38fd1498Szrj }
1400*38fd1498Szrj
1401*38fd1498Szrj /* Check if DST is a register or a subreg of a register; if it is,
1402*38fd1498Szrj update store_ruid, real_store_ruid and use_index in the reg_state
1403*38fd1498Szrj structure accordingly. Called via note_stores from reload_combine. */
1404*38fd1498Szrj
1405*38fd1498Szrj static void
reload_combine_note_store(rtx dst,const_rtx set,void * data ATTRIBUTE_UNUSED)1406*38fd1498Szrj reload_combine_note_store (rtx dst, const_rtx set, void *data ATTRIBUTE_UNUSED)
1407*38fd1498Szrj {
1408*38fd1498Szrj int regno = 0;
1409*38fd1498Szrj int i;
1410*38fd1498Szrj machine_mode mode = GET_MODE (dst);
1411*38fd1498Szrj
1412*38fd1498Szrj if (GET_CODE (dst) == SUBREG)
1413*38fd1498Szrj {
1414*38fd1498Szrj regno = subreg_regno_offset (REGNO (SUBREG_REG (dst)),
1415*38fd1498Szrj GET_MODE (SUBREG_REG (dst)),
1416*38fd1498Szrj SUBREG_BYTE (dst),
1417*38fd1498Szrj GET_MODE (dst));
1418*38fd1498Szrj dst = SUBREG_REG (dst);
1419*38fd1498Szrj }
1420*38fd1498Szrj
1421*38fd1498Szrj /* Some targets do argument pushes without adding REG_INC notes. */
1422*38fd1498Szrj
1423*38fd1498Szrj if (MEM_P (dst))
1424*38fd1498Szrj {
1425*38fd1498Szrj dst = XEXP (dst, 0);
1426*38fd1498Szrj if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC
1427*38fd1498Szrj || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC
1428*38fd1498Szrj || GET_CODE (dst) == PRE_MODIFY || GET_CODE (dst) == POST_MODIFY)
1429*38fd1498Szrj {
1430*38fd1498Szrj unsigned int end_regno = END_REGNO (XEXP (dst, 0));
1431*38fd1498Szrj for (unsigned int i = REGNO (XEXP (dst, 0)); i < end_regno; ++i)
1432*38fd1498Szrj {
1433*38fd1498Szrj /* We could probably do better, but for now mark the register
1434*38fd1498Szrj as used in an unknown fashion and set/clobbered at this
1435*38fd1498Szrj insn. */
1436*38fd1498Szrj reg_state[i].use_index = -1;
1437*38fd1498Szrj reg_state[i].store_ruid = reload_combine_ruid;
1438*38fd1498Szrj reg_state[i].real_store_ruid = reload_combine_ruid;
1439*38fd1498Szrj }
1440*38fd1498Szrj }
1441*38fd1498Szrj else
1442*38fd1498Szrj return;
1443*38fd1498Szrj }
1444*38fd1498Szrj
1445*38fd1498Szrj if (!REG_P (dst))
1446*38fd1498Szrj return;
1447*38fd1498Szrj regno += REGNO (dst);
1448*38fd1498Szrj
1449*38fd1498Szrj /* note_stores might have stripped a STRICT_LOW_PART, so we have to be
1450*38fd1498Szrj careful with registers / register parts that are not full words.
1451*38fd1498Szrj Similarly for ZERO_EXTRACT. */
1452*38fd1498Szrj if (GET_CODE (SET_DEST (set)) == ZERO_EXTRACT
1453*38fd1498Szrj || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART)
1454*38fd1498Szrj {
1455*38fd1498Szrj for (i = end_hard_regno (mode, regno) - 1; i >= regno; i--)
1456*38fd1498Szrj {
1457*38fd1498Szrj reg_state[i].use_index = -1;
1458*38fd1498Szrj reg_state[i].store_ruid = reload_combine_ruid;
1459*38fd1498Szrj reg_state[i].real_store_ruid = reload_combine_ruid;
1460*38fd1498Szrj }
1461*38fd1498Szrj }
1462*38fd1498Szrj else
1463*38fd1498Szrj {
1464*38fd1498Szrj for (i = end_hard_regno (mode, regno) - 1; i >= regno; i--)
1465*38fd1498Szrj {
1466*38fd1498Szrj reg_state[i].store_ruid = reload_combine_ruid;
1467*38fd1498Szrj if (GET_CODE (set) == SET)
1468*38fd1498Szrj reg_state[i].real_store_ruid = reload_combine_ruid;
1469*38fd1498Szrj reg_state[i].use_index = RELOAD_COMBINE_MAX_USES;
1470*38fd1498Szrj }
1471*38fd1498Szrj }
1472*38fd1498Szrj }
1473*38fd1498Szrj
1474*38fd1498Szrj /* XP points to a piece of rtl that has to be checked for any uses of
1475*38fd1498Szrj registers.
1476*38fd1498Szrj *XP is the pattern of INSN, or a part of it.
1477*38fd1498Szrj Called from reload_combine, and recursively by itself. */
1478*38fd1498Szrj static void
reload_combine_note_use(rtx * xp,rtx_insn * insn,int ruid,rtx containing_mem)1479*38fd1498Szrj reload_combine_note_use (rtx *xp, rtx_insn *insn, int ruid, rtx containing_mem)
1480*38fd1498Szrj {
1481*38fd1498Szrj rtx x = *xp;
1482*38fd1498Szrj enum rtx_code code = x->code;
1483*38fd1498Szrj const char *fmt;
1484*38fd1498Szrj int i, j;
1485*38fd1498Szrj rtx offset = const0_rtx; /* For the REG case below. */
1486*38fd1498Szrj
1487*38fd1498Szrj switch (code)
1488*38fd1498Szrj {
1489*38fd1498Szrj case SET:
1490*38fd1498Szrj if (REG_P (SET_DEST (x)))
1491*38fd1498Szrj {
1492*38fd1498Szrj reload_combine_note_use (&SET_SRC (x), insn, ruid, NULL_RTX);
1493*38fd1498Szrj return;
1494*38fd1498Szrj }
1495*38fd1498Szrj break;
1496*38fd1498Szrj
1497*38fd1498Szrj case USE:
1498*38fd1498Szrj /* If this is the USE of a return value, we can't change it. */
1499*38fd1498Szrj if (REG_P (XEXP (x, 0)) && REG_FUNCTION_VALUE_P (XEXP (x, 0)))
1500*38fd1498Szrj {
1501*38fd1498Szrj /* Mark the return register as used in an unknown fashion. */
1502*38fd1498Szrj rtx reg = XEXP (x, 0);
1503*38fd1498Szrj unsigned int end_regno = END_REGNO (reg);
1504*38fd1498Szrj for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
1505*38fd1498Szrj reg_state[regno].use_index = -1;
1506*38fd1498Szrj return;
1507*38fd1498Szrj }
1508*38fd1498Szrj break;
1509*38fd1498Szrj
1510*38fd1498Szrj case CLOBBER:
1511*38fd1498Szrj if (REG_P (SET_DEST (x)))
1512*38fd1498Szrj {
1513*38fd1498Szrj /* No spurious CLOBBERs of pseudo registers may remain. */
1514*38fd1498Szrj gcc_assert (REGNO (SET_DEST (x)) < FIRST_PSEUDO_REGISTER);
1515*38fd1498Szrj return;
1516*38fd1498Szrj }
1517*38fd1498Szrj break;
1518*38fd1498Szrj
1519*38fd1498Szrj case PLUS:
1520*38fd1498Szrj /* We are interested in (plus (reg) (const_int)) . */
1521*38fd1498Szrj if (!REG_P (XEXP (x, 0))
1522*38fd1498Szrj || !CONST_INT_P (XEXP (x, 1)))
1523*38fd1498Szrj break;
1524*38fd1498Szrj offset = XEXP (x, 1);
1525*38fd1498Szrj x = XEXP (x, 0);
1526*38fd1498Szrj /* Fall through. */
1527*38fd1498Szrj case REG:
1528*38fd1498Szrj {
1529*38fd1498Szrj int regno = REGNO (x);
1530*38fd1498Szrj int use_index;
1531*38fd1498Szrj int nregs;
1532*38fd1498Szrj
1533*38fd1498Szrj /* No spurious USEs of pseudo registers may remain. */
1534*38fd1498Szrj gcc_assert (regno < FIRST_PSEUDO_REGISTER);
1535*38fd1498Szrj
1536*38fd1498Szrj nregs = REG_NREGS (x);
1537*38fd1498Szrj
1538*38fd1498Szrj /* We can't substitute into multi-hard-reg uses. */
1539*38fd1498Szrj if (nregs > 1)
1540*38fd1498Szrj {
1541*38fd1498Szrj while (--nregs >= 0)
1542*38fd1498Szrj reg_state[regno + nregs].use_index = -1;
1543*38fd1498Szrj return;
1544*38fd1498Szrj }
1545*38fd1498Szrj
1546*38fd1498Szrj /* We may be called to update uses in previously seen insns.
1547*38fd1498Szrj Don't add uses beyond the last store we saw. */
1548*38fd1498Szrj if (ruid < reg_state[regno].store_ruid)
1549*38fd1498Szrj return;
1550*38fd1498Szrj
1551*38fd1498Szrj /* If this register is already used in some unknown fashion, we
1552*38fd1498Szrj can't do anything.
1553*38fd1498Szrj If we decrement the index from zero to -1, we can't store more
1554*38fd1498Szrj uses, so this register becomes used in an unknown fashion. */
1555*38fd1498Szrj use_index = --reg_state[regno].use_index;
1556*38fd1498Szrj if (use_index < 0)
1557*38fd1498Szrj return;
1558*38fd1498Szrj
1559*38fd1498Szrj if (use_index == RELOAD_COMBINE_MAX_USES - 1)
1560*38fd1498Szrj {
1561*38fd1498Szrj /* This is the first use of this register we have seen since we
1562*38fd1498Szrj marked it as dead. */
1563*38fd1498Szrj reg_state[regno].offset = offset;
1564*38fd1498Szrj reg_state[regno].all_offsets_match = true;
1565*38fd1498Szrj reg_state[regno].use_ruid = ruid;
1566*38fd1498Szrj }
1567*38fd1498Szrj else
1568*38fd1498Szrj {
1569*38fd1498Szrj if (reg_state[regno].use_ruid > ruid)
1570*38fd1498Szrj reg_state[regno].use_ruid = ruid;
1571*38fd1498Szrj
1572*38fd1498Szrj if (! rtx_equal_p (offset, reg_state[regno].offset))
1573*38fd1498Szrj reg_state[regno].all_offsets_match = false;
1574*38fd1498Szrj }
1575*38fd1498Szrj
1576*38fd1498Szrj reg_state[regno].reg_use[use_index].insn = insn;
1577*38fd1498Szrj reg_state[regno].reg_use[use_index].ruid = ruid;
1578*38fd1498Szrj reg_state[regno].reg_use[use_index].containing_mem = containing_mem;
1579*38fd1498Szrj reg_state[regno].reg_use[use_index].usep = xp;
1580*38fd1498Szrj return;
1581*38fd1498Szrj }
1582*38fd1498Szrj
1583*38fd1498Szrj case MEM:
1584*38fd1498Szrj containing_mem = x;
1585*38fd1498Szrj break;
1586*38fd1498Szrj
1587*38fd1498Szrj default:
1588*38fd1498Szrj break;
1589*38fd1498Szrj }
1590*38fd1498Szrj
1591*38fd1498Szrj /* Recursively process the components of X. */
1592*38fd1498Szrj fmt = GET_RTX_FORMAT (code);
1593*38fd1498Szrj for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1594*38fd1498Szrj {
1595*38fd1498Szrj if (fmt[i] == 'e')
1596*38fd1498Szrj reload_combine_note_use (&XEXP (x, i), insn, ruid, containing_mem);
1597*38fd1498Szrj else if (fmt[i] == 'E')
1598*38fd1498Szrj {
1599*38fd1498Szrj for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1600*38fd1498Szrj reload_combine_note_use (&XVECEXP (x, i, j), insn, ruid,
1601*38fd1498Szrj containing_mem);
1602*38fd1498Szrj }
1603*38fd1498Szrj }
1604*38fd1498Szrj }
1605*38fd1498Szrj
1606*38fd1498Szrj /* See if we can reduce the cost of a constant by replacing a move
1607*38fd1498Szrj with an add. We track situations in which a register is set to a
1608*38fd1498Szrj constant or to a register plus a constant. */
1609*38fd1498Szrj /* We cannot do our optimization across labels. Invalidating all the
1610*38fd1498Szrj information about register contents we have would be costly, so we
1611*38fd1498Szrj use move2add_last_label_luid to note where the label is and then
1612*38fd1498Szrj later disable any optimization that would cross it.
1613*38fd1498Szrj reg_offset[n] / reg_base_reg[n] / reg_symbol_ref[n] / reg_mode[n]
1614*38fd1498Szrj are only valid if reg_set_luid[n] is greater than
1615*38fd1498Szrj move2add_last_label_luid.
1616*38fd1498Szrj For a set that established a new (potential) base register with
1617*38fd1498Szrj non-constant value, we use move2add_luid from the place where the
1618*38fd1498Szrj setting insn is encountered; registers based off that base then
1619*38fd1498Szrj get the same reg_set_luid. Constants all get
1620*38fd1498Szrj move2add_last_label_luid + 1 as their reg_set_luid. */
1621*38fd1498Szrj static int reg_set_luid[FIRST_PSEUDO_REGISTER];
1622*38fd1498Szrj
1623*38fd1498Szrj /* If reg_base_reg[n] is negative, register n has been set to
1624*38fd1498Szrj reg_offset[n] or reg_symbol_ref[n] + reg_offset[n] in mode reg_mode[n].
1625*38fd1498Szrj If reg_base_reg[n] is non-negative, register n has been set to the
1626*38fd1498Szrj sum of reg_offset[n] and the value of register reg_base_reg[n]
1627*38fd1498Szrj before reg_set_luid[n], calculated in mode reg_mode[n] .
1628*38fd1498Szrj For multi-hard-register registers, all but the first one are
1629*38fd1498Szrj recorded as BLKmode in reg_mode. Setting reg_mode to VOIDmode
1630*38fd1498Szrj marks it as invalid. */
1631*38fd1498Szrj static HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
1632*38fd1498Szrj static int reg_base_reg[FIRST_PSEUDO_REGISTER];
1633*38fd1498Szrj static rtx reg_symbol_ref[FIRST_PSEUDO_REGISTER];
1634*38fd1498Szrj static machine_mode reg_mode[FIRST_PSEUDO_REGISTER];
1635*38fd1498Szrj
1636*38fd1498Szrj /* move2add_luid is linearly increased while scanning the instructions
1637*38fd1498Szrj from first to last. It is used to set reg_set_luid in
1638*38fd1498Szrj reload_cse_move2add and move2add_note_store. */
1639*38fd1498Szrj static int move2add_luid;
1640*38fd1498Szrj
1641*38fd1498Szrj /* move2add_last_label_luid is set whenever a label is found. Labels
1642*38fd1498Szrj invalidate all previously collected reg_offset data. */
1643*38fd1498Szrj static int move2add_last_label_luid;
1644*38fd1498Szrj
1645*38fd1498Szrj /* ??? We don't know how zero / sign extension is handled, hence we
1646*38fd1498Szrj can't go from a narrower to a wider mode. */
1647*38fd1498Szrj #define MODES_OK_FOR_MOVE2ADD(OUTMODE, INMODE) \
1648*38fd1498Szrj (GET_MODE_SIZE (OUTMODE) == GET_MODE_SIZE (INMODE) \
1649*38fd1498Szrj || (GET_MODE_SIZE (OUTMODE) <= GET_MODE_SIZE (INMODE) \
1650*38fd1498Szrj && TRULY_NOOP_TRUNCATION_MODES_P (OUTMODE, INMODE)))
1651*38fd1498Szrj
1652*38fd1498Szrj /* Record that REG is being set to a value with the mode of REG. */
1653*38fd1498Szrj
1654*38fd1498Szrj static void
move2add_record_mode(rtx reg)1655*38fd1498Szrj move2add_record_mode (rtx reg)
1656*38fd1498Szrj {
1657*38fd1498Szrj int regno, nregs;
1658*38fd1498Szrj machine_mode mode = GET_MODE (reg);
1659*38fd1498Szrj
1660*38fd1498Szrj if (GET_CODE (reg) == SUBREG)
1661*38fd1498Szrj {
1662*38fd1498Szrj regno = subreg_regno (reg);
1663*38fd1498Szrj nregs = subreg_nregs (reg);
1664*38fd1498Szrj }
1665*38fd1498Szrj else if (REG_P (reg))
1666*38fd1498Szrj {
1667*38fd1498Szrj regno = REGNO (reg);
1668*38fd1498Szrj nregs = REG_NREGS (reg);
1669*38fd1498Szrj }
1670*38fd1498Szrj else
1671*38fd1498Szrj gcc_unreachable ();
1672*38fd1498Szrj for (int i = nregs - 1; i > 0; i--)
1673*38fd1498Szrj reg_mode[regno + i] = BLKmode;
1674*38fd1498Szrj reg_mode[regno] = mode;
1675*38fd1498Szrj }
1676*38fd1498Szrj
1677*38fd1498Szrj /* Record that REG is being set to the sum of SYM and OFF. */
1678*38fd1498Szrj
1679*38fd1498Szrj static void
move2add_record_sym_value(rtx reg,rtx sym,rtx off)1680*38fd1498Szrj move2add_record_sym_value (rtx reg, rtx sym, rtx off)
1681*38fd1498Szrj {
1682*38fd1498Szrj int regno = REGNO (reg);
1683*38fd1498Szrj
1684*38fd1498Szrj move2add_record_mode (reg);
1685*38fd1498Szrj reg_set_luid[regno] = move2add_luid;
1686*38fd1498Szrj reg_base_reg[regno] = -1;
1687*38fd1498Szrj reg_symbol_ref[regno] = sym;
1688*38fd1498Szrj reg_offset[regno] = INTVAL (off);
1689*38fd1498Szrj }
1690*38fd1498Szrj
1691*38fd1498Szrj /* Check if REGNO contains a valid value in MODE. */
1692*38fd1498Szrj
1693*38fd1498Szrj static bool
move2add_valid_value_p(int regno,scalar_int_mode mode)1694*38fd1498Szrj move2add_valid_value_p (int regno, scalar_int_mode mode)
1695*38fd1498Szrj {
1696*38fd1498Szrj if (reg_set_luid[regno] <= move2add_last_label_luid)
1697*38fd1498Szrj return false;
1698*38fd1498Szrj
1699*38fd1498Szrj if (mode != reg_mode[regno])
1700*38fd1498Szrj {
1701*38fd1498Szrj scalar_int_mode old_mode;
1702*38fd1498Szrj if (!is_a <scalar_int_mode> (reg_mode[regno], &old_mode)
1703*38fd1498Szrj || !MODES_OK_FOR_MOVE2ADD (mode, old_mode))
1704*38fd1498Szrj return false;
1705*38fd1498Szrj /* The value loaded into regno in reg_mode[regno] is also valid in
1706*38fd1498Szrj mode after truncation only if (REG:mode regno) is the lowpart of
1707*38fd1498Szrj (REG:reg_mode[regno] regno). Now, for big endian, the starting
1708*38fd1498Szrj regno of the lowpart might be different. */
1709*38fd1498Szrj poly_int64 s_off = subreg_lowpart_offset (mode, old_mode);
1710*38fd1498Szrj s_off = subreg_regno_offset (regno, old_mode, s_off, mode);
1711*38fd1498Szrj if (maybe_ne (s_off, 0))
1712*38fd1498Szrj /* We could in principle adjust regno, check reg_mode[regno] to be
1713*38fd1498Szrj BLKmode, and return s_off to the caller (vs. -1 for failure),
1714*38fd1498Szrj but we currently have no callers that could make use of this
1715*38fd1498Szrj information. */
1716*38fd1498Szrj return false;
1717*38fd1498Szrj }
1718*38fd1498Szrj
1719*38fd1498Szrj for (int i = end_hard_regno (mode, regno) - 1; i > regno; i--)
1720*38fd1498Szrj if (reg_mode[i] != BLKmode)
1721*38fd1498Szrj return false;
1722*38fd1498Szrj return true;
1723*38fd1498Szrj }
1724*38fd1498Szrj
1725*38fd1498Szrj /* This function is called with INSN that sets REG (of mode MODE)
1726*38fd1498Szrj to (SYM + OFF), while REG is known to already have value (SYM + offset).
1727*38fd1498Szrj This function tries to change INSN into an add instruction
1728*38fd1498Szrj (set (REG) (plus (REG) (OFF - offset))) using the known value.
1729*38fd1498Szrj It also updates the information about REG's known value.
1730*38fd1498Szrj Return true if we made a change. */
1731*38fd1498Szrj
1732*38fd1498Szrj static bool
move2add_use_add2_insn(scalar_int_mode mode,rtx reg,rtx sym,rtx off,rtx_insn * insn)1733*38fd1498Szrj move2add_use_add2_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
1734*38fd1498Szrj rtx_insn *insn)
1735*38fd1498Szrj {
1736*38fd1498Szrj rtx pat = PATTERN (insn);
1737*38fd1498Szrj rtx src = SET_SRC (pat);
1738*38fd1498Szrj int regno = REGNO (reg);
1739*38fd1498Szrj rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[regno], mode);
1740*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
1741*38fd1498Szrj bool changed = false;
1742*38fd1498Szrj
1743*38fd1498Szrj /* (set (reg) (plus (reg) (const_int 0))) is not canonical;
1744*38fd1498Szrj use (set (reg) (reg)) instead.
1745*38fd1498Szrj We don't delete this insn, nor do we convert it into a
1746*38fd1498Szrj note, to avoid losing register notes or the return
1747*38fd1498Szrj value flag. jump2 already knows how to get rid of
1748*38fd1498Szrj no-op moves. */
1749*38fd1498Szrj if (new_src == const0_rtx)
1750*38fd1498Szrj {
1751*38fd1498Szrj /* If the constants are different, this is a
1752*38fd1498Szrj truncation, that, if turned into (set (reg)
1753*38fd1498Szrj (reg)), would be discarded. Maybe we should
1754*38fd1498Szrj try a truncMN pattern? */
1755*38fd1498Szrj if (INTVAL (off) == reg_offset [regno])
1756*38fd1498Szrj changed = validate_change (insn, &SET_SRC (pat), reg, 0);
1757*38fd1498Szrj }
1758*38fd1498Szrj else
1759*38fd1498Szrj {
1760*38fd1498Szrj struct full_rtx_costs oldcst, newcst;
1761*38fd1498Szrj rtx tem = gen_rtx_PLUS (mode, reg, new_src);
1762*38fd1498Szrj
1763*38fd1498Szrj get_full_set_rtx_cost (pat, &oldcst);
1764*38fd1498Szrj SET_SRC (pat) = tem;
1765*38fd1498Szrj get_full_set_rtx_cost (pat, &newcst);
1766*38fd1498Szrj SET_SRC (pat) = src;
1767*38fd1498Szrj
1768*38fd1498Szrj if (costs_lt_p (&newcst, &oldcst, speed)
1769*38fd1498Szrj && have_add2_insn (reg, new_src))
1770*38fd1498Szrj changed = validate_change (insn, &SET_SRC (pat), tem, 0);
1771*38fd1498Szrj else if (sym == NULL_RTX && mode != BImode)
1772*38fd1498Szrj {
1773*38fd1498Szrj scalar_int_mode narrow_mode;
1774*38fd1498Szrj FOR_EACH_MODE_UNTIL (narrow_mode, mode)
1775*38fd1498Szrj {
1776*38fd1498Szrj if (have_insn_for (STRICT_LOW_PART, narrow_mode)
1777*38fd1498Szrj && ((reg_offset[regno] & ~GET_MODE_MASK (narrow_mode))
1778*38fd1498Szrj == (INTVAL (off) & ~GET_MODE_MASK (narrow_mode))))
1779*38fd1498Szrj {
1780*38fd1498Szrj rtx narrow_reg = gen_lowpart_common (narrow_mode, reg);
1781*38fd1498Szrj rtx narrow_src = gen_int_mode (INTVAL (off),
1782*38fd1498Szrj narrow_mode);
1783*38fd1498Szrj rtx new_set
1784*38fd1498Szrj = gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode,
1785*38fd1498Szrj narrow_reg),
1786*38fd1498Szrj narrow_src);
1787*38fd1498Szrj get_full_set_rtx_cost (new_set, &newcst);
1788*38fd1498Szrj if (costs_lt_p (&newcst, &oldcst, speed))
1789*38fd1498Szrj {
1790*38fd1498Szrj changed = validate_change (insn, &PATTERN (insn),
1791*38fd1498Szrj new_set, 0);
1792*38fd1498Szrj if (changed)
1793*38fd1498Szrj break;
1794*38fd1498Szrj }
1795*38fd1498Szrj }
1796*38fd1498Szrj }
1797*38fd1498Szrj }
1798*38fd1498Szrj }
1799*38fd1498Szrj move2add_record_sym_value (reg, sym, off);
1800*38fd1498Szrj return changed;
1801*38fd1498Szrj }
1802*38fd1498Szrj
1803*38fd1498Szrj
1804*38fd1498Szrj /* This function is called with INSN that sets REG (of mode MODE) to
1805*38fd1498Szrj (SYM + OFF), but REG doesn't have known value (SYM + offset). This
1806*38fd1498Szrj function tries to find another register which is known to already have
1807*38fd1498Szrj value (SYM + offset) and change INSN into an add instruction
1808*38fd1498Szrj (set (REG) (plus (the found register) (OFF - offset))) if such
1809*38fd1498Szrj a register is found. It also updates the information about
1810*38fd1498Szrj REG's known value.
1811*38fd1498Szrj Return true iff we made a change. */
1812*38fd1498Szrj
1813*38fd1498Szrj static bool
move2add_use_add3_insn(scalar_int_mode mode,rtx reg,rtx sym,rtx off,rtx_insn * insn)1814*38fd1498Szrj move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
1815*38fd1498Szrj rtx_insn *insn)
1816*38fd1498Szrj {
1817*38fd1498Szrj rtx pat = PATTERN (insn);
1818*38fd1498Szrj rtx src = SET_SRC (pat);
1819*38fd1498Szrj int regno = REGNO (reg);
1820*38fd1498Szrj int min_regno = 0;
1821*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
1822*38fd1498Szrj int i;
1823*38fd1498Szrj bool changed = false;
1824*38fd1498Szrj struct full_rtx_costs oldcst, newcst, mincst;
1825*38fd1498Szrj rtx plus_expr;
1826*38fd1498Szrj
1827*38fd1498Szrj init_costs_to_max (&mincst);
1828*38fd1498Szrj get_full_set_rtx_cost (pat, &oldcst);
1829*38fd1498Szrj
1830*38fd1498Szrj plus_expr = gen_rtx_PLUS (GET_MODE (reg), reg, const0_rtx);
1831*38fd1498Szrj SET_SRC (pat) = plus_expr;
1832*38fd1498Szrj
1833*38fd1498Szrj for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1834*38fd1498Szrj if (move2add_valid_value_p (i, mode)
1835*38fd1498Szrj && reg_base_reg[i] < 0
1836*38fd1498Szrj && reg_symbol_ref[i] != NULL_RTX
1837*38fd1498Szrj && rtx_equal_p (sym, reg_symbol_ref[i]))
1838*38fd1498Szrj {
1839*38fd1498Szrj rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[i],
1840*38fd1498Szrj GET_MODE (reg));
1841*38fd1498Szrj /* (set (reg) (plus (reg) (const_int 0))) is not canonical;
1842*38fd1498Szrj use (set (reg) (reg)) instead.
1843*38fd1498Szrj We don't delete this insn, nor do we convert it into a
1844*38fd1498Szrj note, to avoid losing register notes or the return
1845*38fd1498Szrj value flag. jump2 already knows how to get rid of
1846*38fd1498Szrj no-op moves. */
1847*38fd1498Szrj if (new_src == const0_rtx)
1848*38fd1498Szrj {
1849*38fd1498Szrj init_costs_to_zero (&mincst);
1850*38fd1498Szrj min_regno = i;
1851*38fd1498Szrj break;
1852*38fd1498Szrj }
1853*38fd1498Szrj else
1854*38fd1498Szrj {
1855*38fd1498Szrj XEXP (plus_expr, 1) = new_src;
1856*38fd1498Szrj get_full_set_rtx_cost (pat, &newcst);
1857*38fd1498Szrj
1858*38fd1498Szrj if (costs_lt_p (&newcst, &mincst, speed))
1859*38fd1498Szrj {
1860*38fd1498Szrj mincst = newcst;
1861*38fd1498Szrj min_regno = i;
1862*38fd1498Szrj }
1863*38fd1498Szrj }
1864*38fd1498Szrj }
1865*38fd1498Szrj SET_SRC (pat) = src;
1866*38fd1498Szrj
1867*38fd1498Szrj if (costs_lt_p (&mincst, &oldcst, speed))
1868*38fd1498Szrj {
1869*38fd1498Szrj rtx tem;
1870*38fd1498Szrj
1871*38fd1498Szrj tem = gen_rtx_REG (GET_MODE (reg), min_regno);
1872*38fd1498Szrj if (i != min_regno)
1873*38fd1498Szrj {
1874*38fd1498Szrj rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[min_regno],
1875*38fd1498Szrj GET_MODE (reg));
1876*38fd1498Szrj tem = gen_rtx_PLUS (GET_MODE (reg), tem, new_src);
1877*38fd1498Szrj }
1878*38fd1498Szrj if (validate_change (insn, &SET_SRC (pat), tem, 0))
1879*38fd1498Szrj changed = true;
1880*38fd1498Szrj }
1881*38fd1498Szrj reg_set_luid[regno] = move2add_luid;
1882*38fd1498Szrj move2add_record_sym_value (reg, sym, off);
1883*38fd1498Szrj return changed;
1884*38fd1498Szrj }
1885*38fd1498Szrj
1886*38fd1498Szrj /* Convert move insns with constant inputs to additions if they are cheaper.
1887*38fd1498Szrj Return true if any changes were made. */
1888*38fd1498Szrj static bool
reload_cse_move2add(rtx_insn * first)1889*38fd1498Szrj reload_cse_move2add (rtx_insn *first)
1890*38fd1498Szrj {
1891*38fd1498Szrj int i;
1892*38fd1498Szrj rtx_insn *insn;
1893*38fd1498Szrj bool changed = false;
1894*38fd1498Szrj
1895*38fd1498Szrj for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
1896*38fd1498Szrj {
1897*38fd1498Szrj reg_set_luid[i] = 0;
1898*38fd1498Szrj reg_offset[i] = 0;
1899*38fd1498Szrj reg_base_reg[i] = 0;
1900*38fd1498Szrj reg_symbol_ref[i] = NULL_RTX;
1901*38fd1498Szrj reg_mode[i] = VOIDmode;
1902*38fd1498Szrj }
1903*38fd1498Szrj
1904*38fd1498Szrj move2add_last_label_luid = 0;
1905*38fd1498Szrj move2add_luid = 2;
1906*38fd1498Szrj for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
1907*38fd1498Szrj {
1908*38fd1498Szrj rtx pat, note;
1909*38fd1498Szrj
1910*38fd1498Szrj if (LABEL_P (insn))
1911*38fd1498Szrj {
1912*38fd1498Szrj move2add_last_label_luid = move2add_luid;
1913*38fd1498Szrj /* We're going to increment move2add_luid twice after a
1914*38fd1498Szrj label, so that we can use move2add_last_label_luid + 1 as
1915*38fd1498Szrj the luid for constants. */
1916*38fd1498Szrj move2add_luid++;
1917*38fd1498Szrj continue;
1918*38fd1498Szrj }
1919*38fd1498Szrj if (! INSN_P (insn))
1920*38fd1498Szrj continue;
1921*38fd1498Szrj pat = PATTERN (insn);
1922*38fd1498Szrj /* For simplicity, we only perform this optimization on
1923*38fd1498Szrj straightforward SETs. */
1924*38fd1498Szrj scalar_int_mode mode;
1925*38fd1498Szrj if (GET_CODE (pat) == SET
1926*38fd1498Szrj && REG_P (SET_DEST (pat))
1927*38fd1498Szrj && is_a <scalar_int_mode> (GET_MODE (SET_DEST (pat)), &mode))
1928*38fd1498Szrj {
1929*38fd1498Szrj rtx reg = SET_DEST (pat);
1930*38fd1498Szrj int regno = REGNO (reg);
1931*38fd1498Szrj rtx src = SET_SRC (pat);
1932*38fd1498Szrj
1933*38fd1498Szrj /* Check if we have valid information on the contents of this
1934*38fd1498Szrj register in the mode of REG. */
1935*38fd1498Szrj if (move2add_valid_value_p (regno, mode)
1936*38fd1498Szrj && dbg_cnt (cse2_move2add))
1937*38fd1498Szrj {
1938*38fd1498Szrj /* Try to transform (set (REGX) (CONST_INT A))
1939*38fd1498Szrj ...
1940*38fd1498Szrj (set (REGX) (CONST_INT B))
1941*38fd1498Szrj to
1942*38fd1498Szrj (set (REGX) (CONST_INT A))
1943*38fd1498Szrj ...
1944*38fd1498Szrj (set (REGX) (plus (REGX) (CONST_INT B-A)))
1945*38fd1498Szrj or
1946*38fd1498Szrj (set (REGX) (CONST_INT A))
1947*38fd1498Szrj ...
1948*38fd1498Szrj (set (STRICT_LOW_PART (REGX)) (CONST_INT B))
1949*38fd1498Szrj */
1950*38fd1498Szrj
1951*38fd1498Szrj if (CONST_INT_P (src)
1952*38fd1498Szrj && reg_base_reg[regno] < 0
1953*38fd1498Szrj && reg_symbol_ref[regno] == NULL_RTX)
1954*38fd1498Szrj {
1955*38fd1498Szrj changed |= move2add_use_add2_insn (mode, reg, NULL_RTX,
1956*38fd1498Szrj src, insn);
1957*38fd1498Szrj continue;
1958*38fd1498Szrj }
1959*38fd1498Szrj
1960*38fd1498Szrj /* Try to transform (set (REGX) (REGY))
1961*38fd1498Szrj (set (REGX) (PLUS (REGX) (CONST_INT A)))
1962*38fd1498Szrj ...
1963*38fd1498Szrj (set (REGX) (REGY))
1964*38fd1498Szrj (set (REGX) (PLUS (REGX) (CONST_INT B)))
1965*38fd1498Szrj to
1966*38fd1498Szrj (set (REGX) (REGY))
1967*38fd1498Szrj (set (REGX) (PLUS (REGX) (CONST_INT A)))
1968*38fd1498Szrj ...
1969*38fd1498Szrj (set (REGX) (plus (REGX) (CONST_INT B-A))) */
1970*38fd1498Szrj else if (REG_P (src)
1971*38fd1498Szrj && reg_set_luid[regno] == reg_set_luid[REGNO (src)]
1972*38fd1498Szrj && reg_base_reg[regno] == reg_base_reg[REGNO (src)]
1973*38fd1498Szrj && move2add_valid_value_p (REGNO (src), mode))
1974*38fd1498Szrj {
1975*38fd1498Szrj rtx_insn *next = next_nonnote_nondebug_insn (insn);
1976*38fd1498Szrj rtx set = NULL_RTX;
1977*38fd1498Szrj if (next)
1978*38fd1498Szrj set = single_set (next);
1979*38fd1498Szrj if (set
1980*38fd1498Szrj && SET_DEST (set) == reg
1981*38fd1498Szrj && GET_CODE (SET_SRC (set)) == PLUS
1982*38fd1498Szrj && XEXP (SET_SRC (set), 0) == reg
1983*38fd1498Szrj && CONST_INT_P (XEXP (SET_SRC (set), 1)))
1984*38fd1498Szrj {
1985*38fd1498Szrj rtx src3 = XEXP (SET_SRC (set), 1);
1986*38fd1498Szrj unsigned HOST_WIDE_INT added_offset = UINTVAL (src3);
1987*38fd1498Szrj HOST_WIDE_INT base_offset = reg_offset[REGNO (src)];
1988*38fd1498Szrj HOST_WIDE_INT regno_offset = reg_offset[regno];
1989*38fd1498Szrj rtx new_src =
1990*38fd1498Szrj gen_int_mode (added_offset
1991*38fd1498Szrj + base_offset
1992*38fd1498Szrj - regno_offset,
1993*38fd1498Szrj mode);
1994*38fd1498Szrj bool success = false;
1995*38fd1498Szrj bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
1996*38fd1498Szrj
1997*38fd1498Szrj if (new_src == const0_rtx)
1998*38fd1498Szrj /* See above why we create (set (reg) (reg)) here. */
1999*38fd1498Szrj success
2000*38fd1498Szrj = validate_change (next, &SET_SRC (set), reg, 0);
2001*38fd1498Szrj else
2002*38fd1498Szrj {
2003*38fd1498Szrj rtx old_src = SET_SRC (set);
2004*38fd1498Szrj struct full_rtx_costs oldcst, newcst;
2005*38fd1498Szrj rtx tem = gen_rtx_PLUS (mode, reg, new_src);
2006*38fd1498Szrj
2007*38fd1498Szrj get_full_set_rtx_cost (set, &oldcst);
2008*38fd1498Szrj SET_SRC (set) = tem;
2009*38fd1498Szrj get_full_set_src_cost (tem, mode, &newcst);
2010*38fd1498Szrj SET_SRC (set) = old_src;
2011*38fd1498Szrj costs_add_n_insns (&oldcst, 1);
2012*38fd1498Szrj
2013*38fd1498Szrj if (costs_lt_p (&newcst, &oldcst, speed)
2014*38fd1498Szrj && have_add2_insn (reg, new_src))
2015*38fd1498Szrj {
2016*38fd1498Szrj rtx newpat = gen_rtx_SET (reg, tem);
2017*38fd1498Szrj success
2018*38fd1498Szrj = validate_change (next, &PATTERN (next),
2019*38fd1498Szrj newpat, 0);
2020*38fd1498Szrj }
2021*38fd1498Szrj }
2022*38fd1498Szrj if (success)
2023*38fd1498Szrj delete_insn (insn);
2024*38fd1498Szrj changed |= success;
2025*38fd1498Szrj insn = next;
2026*38fd1498Szrj move2add_record_mode (reg);
2027*38fd1498Szrj reg_offset[regno]
2028*38fd1498Szrj = trunc_int_for_mode (added_offset + base_offset,
2029*38fd1498Szrj mode);
2030*38fd1498Szrj continue;
2031*38fd1498Szrj }
2032*38fd1498Szrj }
2033*38fd1498Szrj }
2034*38fd1498Szrj
2035*38fd1498Szrj /* Try to transform
2036*38fd1498Szrj (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A))))
2037*38fd1498Szrj ...
2038*38fd1498Szrj (set (REGY) (CONST (PLUS (SYMBOL_REF) (CONST_INT B))))
2039*38fd1498Szrj to
2040*38fd1498Szrj (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A))))
2041*38fd1498Szrj ...
2042*38fd1498Szrj (set (REGY) (CONST (PLUS (REGX) (CONST_INT B-A)))) */
2043*38fd1498Szrj if ((GET_CODE (src) == SYMBOL_REF
2044*38fd1498Szrj || (GET_CODE (src) == CONST
2045*38fd1498Szrj && GET_CODE (XEXP (src, 0)) == PLUS
2046*38fd1498Szrj && GET_CODE (XEXP (XEXP (src, 0), 0)) == SYMBOL_REF
2047*38fd1498Szrj && CONST_INT_P (XEXP (XEXP (src, 0), 1))))
2048*38fd1498Szrj && dbg_cnt (cse2_move2add))
2049*38fd1498Szrj {
2050*38fd1498Szrj rtx sym, off;
2051*38fd1498Szrj
2052*38fd1498Szrj if (GET_CODE (src) == SYMBOL_REF)
2053*38fd1498Szrj {
2054*38fd1498Szrj sym = src;
2055*38fd1498Szrj off = const0_rtx;
2056*38fd1498Szrj }
2057*38fd1498Szrj else
2058*38fd1498Szrj {
2059*38fd1498Szrj sym = XEXP (XEXP (src, 0), 0);
2060*38fd1498Szrj off = XEXP (XEXP (src, 0), 1);
2061*38fd1498Szrj }
2062*38fd1498Szrj
2063*38fd1498Szrj /* If the reg already contains the value which is sum of
2064*38fd1498Szrj sym and some constant value, we can use an add2 insn. */
2065*38fd1498Szrj if (move2add_valid_value_p (regno, mode)
2066*38fd1498Szrj && reg_base_reg[regno] < 0
2067*38fd1498Szrj && reg_symbol_ref[regno] != NULL_RTX
2068*38fd1498Szrj && rtx_equal_p (sym, reg_symbol_ref[regno]))
2069*38fd1498Szrj changed |= move2add_use_add2_insn (mode, reg, sym, off, insn);
2070*38fd1498Szrj
2071*38fd1498Szrj /* Otherwise, we have to find a register whose value is sum
2072*38fd1498Szrj of sym and some constant value. */
2073*38fd1498Szrj else
2074*38fd1498Szrj changed |= move2add_use_add3_insn (mode, reg, sym, off, insn);
2075*38fd1498Szrj
2076*38fd1498Szrj continue;
2077*38fd1498Szrj }
2078*38fd1498Szrj }
2079*38fd1498Szrj
2080*38fd1498Szrj for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
2081*38fd1498Szrj {
2082*38fd1498Szrj if (REG_NOTE_KIND (note) == REG_INC
2083*38fd1498Szrj && REG_P (XEXP (note, 0)))
2084*38fd1498Szrj {
2085*38fd1498Szrj /* Reset the information about this register. */
2086*38fd1498Szrj int regno = REGNO (XEXP (note, 0));
2087*38fd1498Szrj if (regno < FIRST_PSEUDO_REGISTER)
2088*38fd1498Szrj {
2089*38fd1498Szrj move2add_record_mode (XEXP (note, 0));
2090*38fd1498Szrj reg_mode[regno] = VOIDmode;
2091*38fd1498Szrj }
2092*38fd1498Szrj }
2093*38fd1498Szrj }
2094*38fd1498Szrj note_stores (PATTERN (insn), move2add_note_store, insn);
2095*38fd1498Szrj
2096*38fd1498Szrj /* If INSN is a conditional branch, we try to extract an
2097*38fd1498Szrj implicit set out of it. */
2098*38fd1498Szrj if (any_condjump_p (insn))
2099*38fd1498Szrj {
2100*38fd1498Szrj rtx cnd = fis_get_condition (insn);
2101*38fd1498Szrj
2102*38fd1498Szrj if (cnd != NULL_RTX
2103*38fd1498Szrj && GET_CODE (cnd) == NE
2104*38fd1498Szrj && REG_P (XEXP (cnd, 0))
2105*38fd1498Szrj && !reg_set_p (XEXP (cnd, 0), insn)
2106*38fd1498Szrj /* The following two checks, which are also in
2107*38fd1498Szrj move2add_note_store, are intended to reduce the
2108*38fd1498Szrj number of calls to gen_rtx_SET to avoid memory
2109*38fd1498Szrj allocation if possible. */
2110*38fd1498Szrj && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0)))
2111*38fd1498Szrj && REG_NREGS (XEXP (cnd, 0)) == 1
2112*38fd1498Szrj && CONST_INT_P (XEXP (cnd, 1)))
2113*38fd1498Szrj {
2114*38fd1498Szrj rtx implicit_set =
2115*38fd1498Szrj gen_rtx_SET (XEXP (cnd, 0), XEXP (cnd, 1));
2116*38fd1498Szrj move2add_note_store (SET_DEST (implicit_set), implicit_set, insn);
2117*38fd1498Szrj }
2118*38fd1498Szrj }
2119*38fd1498Szrj
2120*38fd1498Szrj /* If this is a CALL_INSN, all call used registers are stored with
2121*38fd1498Szrj unknown values. */
2122*38fd1498Szrj if (CALL_P (insn))
2123*38fd1498Szrj {
2124*38fd1498Szrj rtx link;
2125*38fd1498Szrj
2126*38fd1498Szrj for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
2127*38fd1498Szrj {
2128*38fd1498Szrj if (call_used_regs[i])
2129*38fd1498Szrj /* Reset the information about this register. */
2130*38fd1498Szrj reg_mode[i] = VOIDmode;
2131*38fd1498Szrj }
2132*38fd1498Szrj
2133*38fd1498Szrj for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
2134*38fd1498Szrj link = XEXP (link, 1))
2135*38fd1498Szrj {
2136*38fd1498Szrj rtx setuse = XEXP (link, 0);
2137*38fd1498Szrj rtx usage_rtx = XEXP (setuse, 0);
2138*38fd1498Szrj if (GET_CODE (setuse) == CLOBBER
2139*38fd1498Szrj && REG_P (usage_rtx))
2140*38fd1498Szrj {
2141*38fd1498Szrj unsigned int end_regno = END_REGNO (usage_rtx);
2142*38fd1498Szrj for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r)
2143*38fd1498Szrj /* Reset the information about this register. */
2144*38fd1498Szrj reg_mode[r] = VOIDmode;
2145*38fd1498Szrj }
2146*38fd1498Szrj }
2147*38fd1498Szrj }
2148*38fd1498Szrj }
2149*38fd1498Szrj return changed;
2150*38fd1498Szrj }
2151*38fd1498Szrj
2152*38fd1498Szrj /* SET is a SET or CLOBBER that sets DST. DATA is the insn which
2153*38fd1498Szrj contains SET.
2154*38fd1498Szrj Update reg_set_luid, reg_offset and reg_base_reg accordingly.
2155*38fd1498Szrj Called from reload_cse_move2add via note_stores. */
2156*38fd1498Szrj
2157*38fd1498Szrj static void
move2add_note_store(rtx dst,const_rtx set,void * data)2158*38fd1498Szrj move2add_note_store (rtx dst, const_rtx set, void *data)
2159*38fd1498Szrj {
2160*38fd1498Szrj rtx_insn *insn = (rtx_insn *) data;
2161*38fd1498Szrj unsigned int regno = 0;
2162*38fd1498Szrj scalar_int_mode mode;
2163*38fd1498Szrj
2164*38fd1498Szrj /* Some targets do argument pushes without adding REG_INC notes. */
2165*38fd1498Szrj
2166*38fd1498Szrj if (MEM_P (dst))
2167*38fd1498Szrj {
2168*38fd1498Szrj dst = XEXP (dst, 0);
2169*38fd1498Szrj if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC
2170*38fd1498Szrj || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC)
2171*38fd1498Szrj reg_mode[REGNO (XEXP (dst, 0))] = VOIDmode;
2172*38fd1498Szrj return;
2173*38fd1498Szrj }
2174*38fd1498Szrj
2175*38fd1498Szrj if (GET_CODE (dst) == SUBREG)
2176*38fd1498Szrj regno = subreg_regno (dst);
2177*38fd1498Szrj else if (REG_P (dst))
2178*38fd1498Szrj regno = REGNO (dst);
2179*38fd1498Szrj else
2180*38fd1498Szrj return;
2181*38fd1498Szrj
2182*38fd1498Szrj if (!is_a <scalar_int_mode> (GET_MODE (dst), &mode))
2183*38fd1498Szrj goto invalidate;
2184*38fd1498Szrj
2185*38fd1498Szrj if (GET_CODE (set) == SET)
2186*38fd1498Szrj {
2187*38fd1498Szrj rtx note, sym = NULL_RTX;
2188*38fd1498Szrj rtx off;
2189*38fd1498Szrj
2190*38fd1498Szrj note = find_reg_equal_equiv_note (insn);
2191*38fd1498Szrj if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF)
2192*38fd1498Szrj {
2193*38fd1498Szrj sym = XEXP (note, 0);
2194*38fd1498Szrj off = const0_rtx;
2195*38fd1498Szrj }
2196*38fd1498Szrj else if (note && GET_CODE (XEXP (note, 0)) == CONST
2197*38fd1498Szrj && GET_CODE (XEXP (XEXP (note, 0), 0)) == PLUS
2198*38fd1498Szrj && GET_CODE (XEXP (XEXP (XEXP (note, 0), 0), 0)) == SYMBOL_REF
2199*38fd1498Szrj && CONST_INT_P (XEXP (XEXP (XEXP (note, 0), 0), 1)))
2200*38fd1498Szrj {
2201*38fd1498Szrj sym = XEXP (XEXP (XEXP (note, 0), 0), 0);
2202*38fd1498Szrj off = XEXP (XEXP (XEXP (note, 0), 0), 1);
2203*38fd1498Szrj }
2204*38fd1498Szrj
2205*38fd1498Szrj if (sym != NULL_RTX)
2206*38fd1498Szrj {
2207*38fd1498Szrj move2add_record_sym_value (dst, sym, off);
2208*38fd1498Szrj return;
2209*38fd1498Szrj }
2210*38fd1498Szrj }
2211*38fd1498Szrj
2212*38fd1498Szrj if (GET_CODE (set) == SET
2213*38fd1498Szrj && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2214*38fd1498Szrj && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2215*38fd1498Szrj {
2216*38fd1498Szrj rtx src = SET_SRC (set);
2217*38fd1498Szrj rtx base_reg;
2218*38fd1498Szrj unsigned HOST_WIDE_INT offset;
2219*38fd1498Szrj int base_regno;
2220*38fd1498Szrj
2221*38fd1498Szrj switch (GET_CODE (src))
2222*38fd1498Szrj {
2223*38fd1498Szrj case PLUS:
2224*38fd1498Szrj if (REG_P (XEXP (src, 0)))
2225*38fd1498Szrj {
2226*38fd1498Szrj base_reg = XEXP (src, 0);
2227*38fd1498Szrj
2228*38fd1498Szrj if (CONST_INT_P (XEXP (src, 1)))
2229*38fd1498Szrj offset = UINTVAL (XEXP (src, 1));
2230*38fd1498Szrj else if (REG_P (XEXP (src, 1))
2231*38fd1498Szrj && move2add_valid_value_p (REGNO (XEXP (src, 1)), mode))
2232*38fd1498Szrj {
2233*38fd1498Szrj if (reg_base_reg[REGNO (XEXP (src, 1))] < 0
2234*38fd1498Szrj && reg_symbol_ref[REGNO (XEXP (src, 1))] == NULL_RTX)
2235*38fd1498Szrj offset = reg_offset[REGNO (XEXP (src, 1))];
2236*38fd1498Szrj /* Maybe the first register is known to be a
2237*38fd1498Szrj constant. */
2238*38fd1498Szrj else if (move2add_valid_value_p (REGNO (base_reg), mode)
2239*38fd1498Szrj && reg_base_reg[REGNO (base_reg)] < 0
2240*38fd1498Szrj && reg_symbol_ref[REGNO (base_reg)] == NULL_RTX)
2241*38fd1498Szrj {
2242*38fd1498Szrj offset = reg_offset[REGNO (base_reg)];
2243*38fd1498Szrj base_reg = XEXP (src, 1);
2244*38fd1498Szrj }
2245*38fd1498Szrj else
2246*38fd1498Szrj goto invalidate;
2247*38fd1498Szrj }
2248*38fd1498Szrj else
2249*38fd1498Szrj goto invalidate;
2250*38fd1498Szrj
2251*38fd1498Szrj break;
2252*38fd1498Szrj }
2253*38fd1498Szrj
2254*38fd1498Szrj goto invalidate;
2255*38fd1498Szrj
2256*38fd1498Szrj case REG:
2257*38fd1498Szrj base_reg = src;
2258*38fd1498Szrj offset = 0;
2259*38fd1498Szrj break;
2260*38fd1498Szrj
2261*38fd1498Szrj case CONST_INT:
2262*38fd1498Szrj /* Start tracking the register as a constant. */
2263*38fd1498Szrj reg_base_reg[regno] = -1;
2264*38fd1498Szrj reg_symbol_ref[regno] = NULL_RTX;
2265*38fd1498Szrj reg_offset[regno] = INTVAL (SET_SRC (set));
2266*38fd1498Szrj /* We assign the same luid to all registers set to constants. */
2267*38fd1498Szrj reg_set_luid[regno] = move2add_last_label_luid + 1;
2268*38fd1498Szrj move2add_record_mode (dst);
2269*38fd1498Szrj return;
2270*38fd1498Szrj
2271*38fd1498Szrj default:
2272*38fd1498Szrj goto invalidate;
2273*38fd1498Szrj }
2274*38fd1498Szrj
2275*38fd1498Szrj base_regno = REGNO (base_reg);
2276*38fd1498Szrj /* If information about the base register is not valid, set it
2277*38fd1498Szrj up as a new base register, pretending its value is known
2278*38fd1498Szrj starting from the current insn. */
2279*38fd1498Szrj if (!move2add_valid_value_p (base_regno, mode))
2280*38fd1498Szrj {
2281*38fd1498Szrj reg_base_reg[base_regno] = base_regno;
2282*38fd1498Szrj reg_symbol_ref[base_regno] = NULL_RTX;
2283*38fd1498Szrj reg_offset[base_regno] = 0;
2284*38fd1498Szrj reg_set_luid[base_regno] = move2add_luid;
2285*38fd1498Szrj gcc_assert (GET_MODE (base_reg) == mode);
2286*38fd1498Szrj move2add_record_mode (base_reg);
2287*38fd1498Szrj }
2288*38fd1498Szrj
2289*38fd1498Szrj /* Copy base information from our base register. */
2290*38fd1498Szrj reg_set_luid[regno] = reg_set_luid[base_regno];
2291*38fd1498Szrj reg_base_reg[regno] = reg_base_reg[base_regno];
2292*38fd1498Szrj reg_symbol_ref[regno] = reg_symbol_ref[base_regno];
2293*38fd1498Szrj
2294*38fd1498Szrj /* Compute the sum of the offsets or constants. */
2295*38fd1498Szrj reg_offset[regno]
2296*38fd1498Szrj = trunc_int_for_mode (offset + reg_offset[base_regno], mode);
2297*38fd1498Szrj
2298*38fd1498Szrj move2add_record_mode (dst);
2299*38fd1498Szrj }
2300*38fd1498Szrj else
2301*38fd1498Szrj {
2302*38fd1498Szrj invalidate:
2303*38fd1498Szrj /* Invalidate the contents of the register. */
2304*38fd1498Szrj move2add_record_mode (dst);
2305*38fd1498Szrj reg_mode[regno] = VOIDmode;
2306*38fd1498Szrj }
2307*38fd1498Szrj }
2308*38fd1498Szrj
2309*38fd1498Szrj namespace {
2310*38fd1498Szrj
2311*38fd1498Szrj const pass_data pass_data_postreload_cse =
2312*38fd1498Szrj {
2313*38fd1498Szrj RTL_PASS, /* type */
2314*38fd1498Szrj "postreload", /* name */
2315*38fd1498Szrj OPTGROUP_NONE, /* optinfo_flags */
2316*38fd1498Szrj TV_RELOAD_CSE_REGS, /* tv_id */
2317*38fd1498Szrj 0, /* properties_required */
2318*38fd1498Szrj 0, /* properties_provided */
2319*38fd1498Szrj 0, /* properties_destroyed */
2320*38fd1498Szrj 0, /* todo_flags_start */
2321*38fd1498Szrj TODO_df_finish, /* todo_flags_finish */
2322*38fd1498Szrj };
2323*38fd1498Szrj
2324*38fd1498Szrj class pass_postreload_cse : public rtl_opt_pass
2325*38fd1498Szrj {
2326*38fd1498Szrj public:
pass_postreload_cse(gcc::context * ctxt)2327*38fd1498Szrj pass_postreload_cse (gcc::context *ctxt)
2328*38fd1498Szrj : rtl_opt_pass (pass_data_postreload_cse, ctxt)
2329*38fd1498Szrj {}
2330*38fd1498Szrj
2331*38fd1498Szrj /* opt_pass methods: */
gate(function *)2332*38fd1498Szrj virtual bool gate (function *) { return (optimize > 0 && reload_completed); }
2333*38fd1498Szrj
2334*38fd1498Szrj virtual unsigned int execute (function *);
2335*38fd1498Szrj
2336*38fd1498Szrj }; // class pass_postreload_cse
2337*38fd1498Szrj
2338*38fd1498Szrj unsigned int
execute(function * fun)2339*38fd1498Szrj pass_postreload_cse::execute (function *fun)
2340*38fd1498Szrj {
2341*38fd1498Szrj if (!dbg_cnt (postreload_cse))
2342*38fd1498Szrj return 0;
2343*38fd1498Szrj
2344*38fd1498Szrj /* Do a very simple CSE pass over just the hard registers. */
2345*38fd1498Szrj reload_cse_regs (get_insns ());
2346*38fd1498Szrj /* Reload_cse_regs can eliminate potentially-trapping MEMs.
2347*38fd1498Szrj Remove any EH edges associated with them. */
2348*38fd1498Szrj if (fun->can_throw_non_call_exceptions
2349*38fd1498Szrj && purge_all_dead_edges ())
2350*38fd1498Szrj cleanup_cfg (0);
2351*38fd1498Szrj
2352*38fd1498Szrj return 0;
2353*38fd1498Szrj }
2354*38fd1498Szrj
2355*38fd1498Szrj } // anon namespace
2356*38fd1498Szrj
2357*38fd1498Szrj rtl_opt_pass *
make_pass_postreload_cse(gcc::context * ctxt)2358*38fd1498Szrj make_pass_postreload_cse (gcc::context *ctxt)
2359*38fd1498Szrj {
2360*38fd1498Szrj return new pass_postreload_cse (ctxt);
2361*38fd1498Szrj }
2362