1 /* Perform simple optimizations to clean up the result of reload.
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "predict.h"
28 #include "df.h"
29 #include "memmodel.h"
30 #include "tm_p.h"
31 #include "optabs.h"
32 #include "regs.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "cfgcleanup.h"
39 #include "reload.h"
40 #include "cselib.h"
41 #include "tree-pass.h"
42 #include "dbgcnt.h"
43
44 static int reload_cse_noop_set_p (rtx);
45 static bool reload_cse_simplify (rtx_insn *, rtx);
46 static void reload_cse_regs_1 (void);
47 static int reload_cse_simplify_set (rtx, rtx_insn *);
48 static int reload_cse_simplify_operands (rtx_insn *, rtx);
49
50 static void reload_combine (void);
51 static void reload_combine_note_use (rtx *, rtx_insn *, int, rtx);
52 static void reload_combine_note_store (rtx, const_rtx, void *);
53
54 static bool reload_cse_move2add (rtx_insn *);
55 static void move2add_note_store (rtx, const_rtx, void *);
56
57 /* Call cse / combine like post-reload optimization phases.
58 FIRST is the first instruction. */
59
60 static void
reload_cse_regs(rtx_insn * first ATTRIBUTE_UNUSED)61 reload_cse_regs (rtx_insn *first ATTRIBUTE_UNUSED)
62 {
63 bool moves_converted;
64 reload_cse_regs_1 ();
65 reload_combine ();
66 moves_converted = reload_cse_move2add (first);
67 if (flag_expensive_optimizations)
68 {
69 if (moves_converted)
70 reload_combine ();
71 reload_cse_regs_1 ();
72 }
73 }
74
75 /* See whether a single set SET is a noop. */
76 static int
reload_cse_noop_set_p(rtx set)77 reload_cse_noop_set_p (rtx set)
78 {
79 if (cselib_reg_set_mode (SET_DEST (set)) != GET_MODE (SET_DEST (set)))
80 return 0;
81
82 return rtx_equal_for_cselib_p (SET_DEST (set), SET_SRC (set));
83 }
84
85 /* Try to simplify INSN. Return true if the CFG may have changed. */
86 static bool
reload_cse_simplify(rtx_insn * insn,rtx testreg)87 reload_cse_simplify (rtx_insn *insn, rtx testreg)
88 {
89 rtx body = PATTERN (insn);
90 basic_block insn_bb = BLOCK_FOR_INSN (insn);
91 unsigned insn_bb_succs = EDGE_COUNT (insn_bb->succs);
92
93 /* If NO_FUNCTION_CSE has been set by the target, then we should not try
94 to cse function calls. */
95 if (NO_FUNCTION_CSE && CALL_P (insn))
96 return false;
97
98 if (GET_CODE (body) == SET)
99 {
100 int count = 0;
101
102 /* Simplify even if we may think it is a no-op.
103 We may think a memory load of a value smaller than WORD_SIZE
104 is redundant because we haven't taken into account possible
105 implicit extension. reload_cse_simplify_set() will bring
106 this out, so it's safer to simplify before we delete. */
107 count += reload_cse_simplify_set (body, insn);
108
109 if (!count && reload_cse_noop_set_p (body))
110 {
111 if (check_for_inc_dec (insn))
112 delete_insn_and_edges (insn);
113 /* We're done with this insn. */
114 goto done;
115 }
116
117 if (count > 0)
118 apply_change_group ();
119 else
120 reload_cse_simplify_operands (insn, testreg);
121 }
122 else if (GET_CODE (body) == PARALLEL)
123 {
124 int i;
125 int count = 0;
126 rtx value = NULL_RTX;
127
128 /* Registers mentioned in the clobber list for an asm cannot be reused
129 within the body of the asm. Invalidate those registers now so that
130 we don't try to substitute values for them. */
131 if (asm_noperands (body) >= 0)
132 {
133 for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
134 {
135 rtx part = XVECEXP (body, 0, i);
136 if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0)))
137 cselib_invalidate_rtx (XEXP (part, 0));
138 }
139 }
140
141 /* If every action in a PARALLEL is a noop, we can delete
142 the entire PARALLEL. */
143 for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
144 {
145 rtx part = XVECEXP (body, 0, i);
146 if (GET_CODE (part) == SET)
147 {
148 if (! reload_cse_noop_set_p (part))
149 break;
150 if (REG_P (SET_DEST (part))
151 && REG_FUNCTION_VALUE_P (SET_DEST (part)))
152 {
153 if (value)
154 break;
155 value = SET_DEST (part);
156 }
157 }
158 else if (GET_CODE (part) != CLOBBER
159 && GET_CODE (part) != USE)
160 break;
161 }
162
163 if (i < 0)
164 {
165 if (check_for_inc_dec (insn))
166 delete_insn_and_edges (insn);
167 /* We're done with this insn. */
168 goto done;
169 }
170
171 /* It's not a no-op, but we can try to simplify it. */
172 for (i = XVECLEN (body, 0) - 1; i >= 0; --i)
173 if (GET_CODE (XVECEXP (body, 0, i)) == SET)
174 count += reload_cse_simplify_set (XVECEXP (body, 0, i), insn);
175
176 if (count > 0)
177 apply_change_group ();
178 else
179 reload_cse_simplify_operands (insn, testreg);
180 }
181
182 done:
183 return (EDGE_COUNT (insn_bb->succs) != insn_bb_succs);
184 }
185
186 /* Do a very simple CSE pass over the hard registers.
187
188 This function detects no-op moves where we happened to assign two
189 different pseudo-registers to the same hard register, and then
190 copied one to the other. Reload will generate a useless
191 instruction copying a register to itself.
192
193 This function also detects cases where we load a value from memory
194 into two different registers, and (if memory is more expensive than
195 registers) changes it to simply copy the first register into the
196 second register.
197
198 Another optimization is performed that scans the operands of each
199 instruction to see whether the value is already available in a
200 hard register. It then replaces the operand with the hard register
201 if possible, much like an optional reload would. */
202
203 static void
reload_cse_regs_1(void)204 reload_cse_regs_1 (void)
205 {
206 bool cfg_changed = false;
207 basic_block bb;
208 rtx_insn *insn;
209 rtx testreg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
210
211 cselib_init (CSELIB_RECORD_MEMORY);
212 init_alias_analysis ();
213
214 FOR_EACH_BB_FN (bb, cfun)
215 FOR_BB_INSNS (bb, insn)
216 {
217 if (INSN_P (insn))
218 cfg_changed |= reload_cse_simplify (insn, testreg);
219
220 cselib_process_insn (insn);
221 }
222
223 /* Clean up. */
224 end_alias_analysis ();
225 cselib_finish ();
226 if (cfg_changed)
227 cleanup_cfg (0);
228 }
229
230 /* Try to simplify a single SET instruction. SET is the set pattern.
231 INSN is the instruction it came from.
232 This function only handles one case: if we set a register to a value
233 which is not a register, we try to find that value in some other register
234 and change the set into a register copy. */
235
236 static int
reload_cse_simplify_set(rtx set,rtx_insn * insn)237 reload_cse_simplify_set (rtx set, rtx_insn *insn)
238 {
239 int did_change = 0;
240 int dreg;
241 rtx src;
242 reg_class_t dclass;
243 int old_cost;
244 cselib_val *val;
245 struct elt_loc_list *l;
246 enum rtx_code extend_op = UNKNOWN;
247 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
248
249 dreg = true_regnum (SET_DEST (set));
250 if (dreg < 0)
251 return 0;
252
253 src = SET_SRC (set);
254 if (side_effects_p (src) || true_regnum (src) >= 0)
255 return 0;
256
257 dclass = REGNO_REG_CLASS (dreg);
258
259 /* When replacing a memory with a register, we need to honor assumptions
260 that combine made wrt the contents of sign bits. We'll do this by
261 generating an extend instruction instead of a reg->reg copy. Thus
262 the destination must be a register that we can widen. */
263 if (MEM_P (src)
264 && (extend_op = load_extend_op (GET_MODE (src))) != UNKNOWN
265 && !REG_P (SET_DEST (set)))
266 return 0;
267
268 val = cselib_lookup (src, GET_MODE (SET_DEST (set)), 0, VOIDmode);
269 if (! val)
270 return 0;
271
272 /* If memory loads are cheaper than register copies, don't change them. */
273 if (MEM_P (src))
274 old_cost = memory_move_cost (GET_MODE (src), dclass, true);
275 else if (REG_P (src))
276 old_cost = register_move_cost (GET_MODE (src),
277 REGNO_REG_CLASS (REGNO (src)), dclass);
278 else
279 old_cost = set_src_cost (src, GET_MODE (SET_DEST (set)), speed);
280
281 for (l = val->locs; l; l = l->next)
282 {
283 rtx this_rtx = l->loc;
284 int this_cost;
285
286 if (CONSTANT_P (this_rtx) && ! references_value_p (this_rtx, 0))
287 {
288 if (extend_op != UNKNOWN)
289 {
290 wide_int result;
291
292 if (!CONST_SCALAR_INT_P (this_rtx))
293 continue;
294
295 switch (extend_op)
296 {
297 case ZERO_EXTEND:
298 result = wide_int::from (rtx_mode_t (this_rtx,
299 GET_MODE (src)),
300 BITS_PER_WORD, UNSIGNED);
301 break;
302 case SIGN_EXTEND:
303 result = wide_int::from (rtx_mode_t (this_rtx,
304 GET_MODE (src)),
305 BITS_PER_WORD, SIGNED);
306 break;
307 default:
308 gcc_unreachable ();
309 }
310 this_rtx = immed_wide_int_const (result, word_mode);
311 }
312
313 this_cost = set_src_cost (this_rtx, GET_MODE (SET_DEST (set)), speed);
314 }
315 else if (REG_P (this_rtx))
316 {
317 if (extend_op != UNKNOWN)
318 {
319 this_rtx = gen_rtx_fmt_e (extend_op, word_mode, this_rtx);
320 this_cost = set_src_cost (this_rtx, word_mode, speed);
321 }
322 else
323 this_cost = register_move_cost (GET_MODE (this_rtx),
324 REGNO_REG_CLASS (REGNO (this_rtx)),
325 dclass);
326 }
327 else
328 continue;
329
330 /* If equal costs, prefer registers over anything else. That
331 tends to lead to smaller instructions on some machines. */
332 if (this_cost < old_cost
333 || (this_cost == old_cost
334 && REG_P (this_rtx)
335 && !REG_P (SET_SRC (set))))
336 {
337 if (extend_op != UNKNOWN
338 && REG_CAN_CHANGE_MODE_P (REGNO (SET_DEST (set)),
339 GET_MODE (SET_DEST (set)), word_mode))
340 {
341 rtx wide_dest = gen_rtx_REG (word_mode, REGNO (SET_DEST (set)));
342 ORIGINAL_REGNO (wide_dest) = ORIGINAL_REGNO (SET_DEST (set));
343 validate_change (insn, &SET_DEST (set), wide_dest, 1);
344 }
345
346 validate_unshare_change (insn, &SET_SRC (set), this_rtx, 1);
347 old_cost = this_cost, did_change = 1;
348 }
349 }
350
351 return did_change;
352 }
353
354 /* Try to replace operands in INSN with equivalent values that are already
355 in registers. This can be viewed as optional reloading.
356
357 For each non-register operand in the insn, see if any hard regs are
358 known to be equivalent to that operand. Record the alternatives which
359 can accept these hard registers. Among all alternatives, select the
360 ones which are better or equal to the one currently matching, where
361 "better" is in terms of '?' and '!' constraints. Among the remaining
362 alternatives, select the one which replaces most operands with
363 hard registers. */
364
365 static int
reload_cse_simplify_operands(rtx_insn * insn,rtx testreg)366 reload_cse_simplify_operands (rtx_insn *insn, rtx testreg)
367 {
368 int i, j;
369
370 /* For each operand, all registers that are equivalent to it. */
371 HARD_REG_SET equiv_regs[MAX_RECOG_OPERANDS];
372
373 const char *constraints[MAX_RECOG_OPERANDS];
374
375 /* Vector recording how bad an alternative is. */
376 int *alternative_reject;
377 /* Vector recording how many registers can be introduced by choosing
378 this alternative. */
379 int *alternative_nregs;
380 /* Array of vectors recording, for each operand and each alternative,
381 which hard register to substitute, or -1 if the operand should be
382 left as it is. */
383 int *op_alt_regno[MAX_RECOG_OPERANDS];
384 /* Array of alternatives, sorted in order of decreasing desirability. */
385 int *alternative_order;
386
387 extract_constrain_insn (insn);
388
389 if (recog_data.n_alternatives == 0 || recog_data.n_operands == 0)
390 return 0;
391
392 alternative_reject = XALLOCAVEC (int, recog_data.n_alternatives);
393 alternative_nregs = XALLOCAVEC (int, recog_data.n_alternatives);
394 alternative_order = XALLOCAVEC (int, recog_data.n_alternatives);
395 memset (alternative_reject, 0, recog_data.n_alternatives * sizeof (int));
396 memset (alternative_nregs, 0, recog_data.n_alternatives * sizeof (int));
397
398 /* For each operand, find out which regs are equivalent. */
399 for (i = 0; i < recog_data.n_operands; i++)
400 {
401 cselib_val *v;
402 struct elt_loc_list *l;
403 rtx op;
404
405 CLEAR_HARD_REG_SET (equiv_regs[i]);
406
407 /* cselib blows up on CODE_LABELs. Trying to fix that doesn't seem
408 right, so avoid the problem here. Similarly NOTE_INSN_DELETED_LABEL.
409 Likewise if we have a constant and the insn pattern doesn't tell us
410 the mode we need. */
411 if (LABEL_P (recog_data.operand[i])
412 || (NOTE_P (recog_data.operand[i])
413 && NOTE_KIND (recog_data.operand[i]) == NOTE_INSN_DELETED_LABEL)
414 || (CONSTANT_P (recog_data.operand[i])
415 && recog_data.operand_mode[i] == VOIDmode))
416 continue;
417
418 op = recog_data.operand[i];
419 if (MEM_P (op) && load_extend_op (GET_MODE (op)) != UNKNOWN)
420 {
421 rtx set = single_set (insn);
422
423 /* We might have multiple sets, some of which do implicit
424 extension. Punt on this for now. */
425 if (! set)
426 continue;
427 /* If the destination is also a MEM or a STRICT_LOW_PART, no
428 extension applies.
429 Also, if there is an explicit extension, we don't have to
430 worry about an implicit one. */
431 else if (MEM_P (SET_DEST (set))
432 || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART
433 || GET_CODE (SET_SRC (set)) == ZERO_EXTEND
434 || GET_CODE (SET_SRC (set)) == SIGN_EXTEND)
435 ; /* Continue ordinary processing. */
436 /* If the register cannot change mode to word_mode, it follows that
437 it cannot have been used in word_mode. */
438 else if (REG_P (SET_DEST (set))
439 && !REG_CAN_CHANGE_MODE_P (REGNO (SET_DEST (set)),
440 GET_MODE (SET_DEST (set)),
441 word_mode))
442 ; /* Continue ordinary processing. */
443 /* If this is a straight load, make the extension explicit. */
444 else if (REG_P (SET_DEST (set))
445 && recog_data.n_operands == 2
446 && SET_SRC (set) == op
447 && SET_DEST (set) == recog_data.operand[1-i])
448 {
449 validate_change (insn, recog_data.operand_loc[i],
450 gen_rtx_fmt_e (load_extend_op (GET_MODE (op)),
451 word_mode, op),
452 1);
453 validate_change (insn, recog_data.operand_loc[1-i],
454 gen_rtx_REG (word_mode, REGNO (SET_DEST (set))),
455 1);
456 if (! apply_change_group ())
457 return 0;
458 return reload_cse_simplify_operands (insn, testreg);
459 }
460 else
461 /* ??? There might be arithmetic operations with memory that are
462 safe to optimize, but is it worth the trouble? */
463 continue;
464 }
465
466 if (side_effects_p (op))
467 continue;
468 v = cselib_lookup (op, recog_data.operand_mode[i], 0, VOIDmode);
469 if (! v)
470 continue;
471
472 for (l = v->locs; l; l = l->next)
473 if (REG_P (l->loc))
474 SET_HARD_REG_BIT (equiv_regs[i], REGNO (l->loc));
475 }
476
477 alternative_mask preferred = get_preferred_alternatives (insn);
478 for (i = 0; i < recog_data.n_operands; i++)
479 {
480 machine_mode mode;
481 int regno;
482 const char *p;
483
484 op_alt_regno[i] = XALLOCAVEC (int, recog_data.n_alternatives);
485 for (j = 0; j < recog_data.n_alternatives; j++)
486 op_alt_regno[i][j] = -1;
487
488 p = constraints[i] = recog_data.constraints[i];
489 mode = recog_data.operand_mode[i];
490
491 /* Add the reject values for each alternative given by the constraints
492 for this operand. */
493 j = 0;
494 while (*p != '\0')
495 {
496 char c = *p++;
497 if (c == ',')
498 j++;
499 else if (c == '?')
500 alternative_reject[j] += 3;
501 else if (c == '!')
502 alternative_reject[j] += 300;
503 }
504
505 /* We won't change operands which are already registers. We
506 also don't want to modify output operands. */
507 regno = true_regnum (recog_data.operand[i]);
508 if (regno >= 0
509 || constraints[i][0] == '='
510 || constraints[i][0] == '+')
511 continue;
512
513 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
514 {
515 enum reg_class rclass = NO_REGS;
516
517 if (! TEST_HARD_REG_BIT (equiv_regs[i], regno))
518 continue;
519
520 set_mode_and_regno (testreg, mode, regno);
521
522 /* We found a register equal to this operand. Now look for all
523 alternatives that can accept this register and have not been
524 assigned a register they can use yet. */
525 j = 0;
526 p = constraints[i];
527 for (;;)
528 {
529 char c = *p;
530
531 switch (c)
532 {
533 case 'g':
534 rclass = reg_class_subunion[rclass][GENERAL_REGS];
535 break;
536
537 default:
538 rclass
539 = (reg_class_subunion
540 [rclass]
541 [reg_class_for_constraint (lookup_constraint (p))]);
542 break;
543
544 case ',': case '\0':
545 /* See if REGNO fits this alternative, and set it up as the
546 replacement register if we don't have one for this
547 alternative yet and the operand being replaced is not
548 a cheap CONST_INT. */
549 if (op_alt_regno[i][j] == -1
550 && TEST_BIT (preferred, j)
551 && reg_fits_class_p (testreg, rclass, 0, mode)
552 && (!CONST_INT_P (recog_data.operand[i])
553 || (set_src_cost (recog_data.operand[i], mode,
554 optimize_bb_for_speed_p
555 (BLOCK_FOR_INSN (insn)))
556 > set_src_cost (testreg, mode,
557 optimize_bb_for_speed_p
558 (BLOCK_FOR_INSN (insn))))))
559 {
560 alternative_nregs[j]++;
561 op_alt_regno[i][j] = regno;
562 }
563 j++;
564 rclass = NO_REGS;
565 break;
566 }
567 p += CONSTRAINT_LEN (c, p);
568
569 if (c == '\0')
570 break;
571 }
572 }
573 }
574
575 /* Record all alternatives which are better or equal to the currently
576 matching one in the alternative_order array. */
577 for (i = j = 0; i < recog_data.n_alternatives; i++)
578 if (alternative_reject[i] <= alternative_reject[which_alternative])
579 alternative_order[j++] = i;
580 recog_data.n_alternatives = j;
581
582 /* Sort it. Given a small number of alternatives, a dumb algorithm
583 won't hurt too much. */
584 for (i = 0; i < recog_data.n_alternatives - 1; i++)
585 {
586 int best = i;
587 int best_reject = alternative_reject[alternative_order[i]];
588 int best_nregs = alternative_nregs[alternative_order[i]];
589
590 for (j = i + 1; j < recog_data.n_alternatives; j++)
591 {
592 int this_reject = alternative_reject[alternative_order[j]];
593 int this_nregs = alternative_nregs[alternative_order[j]];
594
595 if (this_reject < best_reject
596 || (this_reject == best_reject && this_nregs > best_nregs))
597 {
598 best = j;
599 best_reject = this_reject;
600 best_nregs = this_nregs;
601 }
602 }
603
604 std::swap (alternative_order[best], alternative_order[i]);
605 }
606
607 /* Substitute the operands as determined by op_alt_regno for the best
608 alternative. */
609 j = alternative_order[0];
610
611 for (i = 0; i < recog_data.n_operands; i++)
612 {
613 machine_mode mode = recog_data.operand_mode[i];
614 if (op_alt_regno[i][j] == -1)
615 continue;
616
617 validate_change (insn, recog_data.operand_loc[i],
618 gen_rtx_REG (mode, op_alt_regno[i][j]), 1);
619 }
620
621 for (i = recog_data.n_dups - 1; i >= 0; i--)
622 {
623 int op = recog_data.dup_num[i];
624 machine_mode mode = recog_data.operand_mode[op];
625
626 if (op_alt_regno[op][j] == -1)
627 continue;
628
629 validate_change (insn, recog_data.dup_loc[i],
630 gen_rtx_REG (mode, op_alt_regno[op][j]), 1);
631 }
632
633 return apply_change_group ();
634 }
635
636 /* If reload couldn't use reg+reg+offset addressing, try to use reg+reg
637 addressing now.
638 This code might also be useful when reload gave up on reg+reg addressing
639 because of clashes between the return register and INDEX_REG_CLASS. */
640
641 /* The maximum number of uses of a register we can keep track of to
642 replace them with reg+reg addressing. */
643 #define RELOAD_COMBINE_MAX_USES 16
644
645 /* Describes a recorded use of a register. */
646 struct reg_use
647 {
648 /* The insn where a register has been used. */
649 rtx_insn *insn;
650 /* Points to the memory reference enclosing the use, if any, NULL_RTX
651 otherwise. */
652 rtx containing_mem;
653 /* Location of the register within INSN. */
654 rtx *usep;
655 /* The reverse uid of the insn. */
656 int ruid;
657 };
658
659 /* If the register is used in some unknown fashion, USE_INDEX is negative.
660 If it is dead, USE_INDEX is RELOAD_COMBINE_MAX_USES, and STORE_RUID
661 indicates where it is first set or clobbered.
662 Otherwise, USE_INDEX is the index of the last encountered use of the
663 register (which is first among these we have seen since we scan backwards).
664 USE_RUID indicates the first encountered, i.e. last, of these uses.
665 If ALL_OFFSETS_MATCH is true, all encountered uses were inside a PLUS
666 with a constant offset; OFFSET contains this constant in that case.
667 STORE_RUID is always meaningful if we only want to use a value in a
668 register in a different place: it denotes the next insn in the insn
669 stream (i.e. the last encountered) that sets or clobbers the register.
670 REAL_STORE_RUID is similar, but clobbers are ignored when updating it. */
671 static struct
672 {
673 struct reg_use reg_use[RELOAD_COMBINE_MAX_USES];
674 rtx offset;
675 int use_index;
676 int store_ruid;
677 int real_store_ruid;
678 int use_ruid;
679 bool all_offsets_match;
680 } reg_state[FIRST_PSEUDO_REGISTER];
681
682 /* Reverse linear uid. This is increased in reload_combine while scanning
683 the instructions from last to first. It is used to set last_label_ruid
684 and the store_ruid / use_ruid fields in reg_state. */
685 static int reload_combine_ruid;
686
687 /* The RUID of the last label we encountered in reload_combine. */
688 static int last_label_ruid;
689
690 /* The RUID of the last jump we encountered in reload_combine. */
691 static int last_jump_ruid;
692
693 /* The register numbers of the first and last index register. A value of
694 -1 in LAST_INDEX_REG indicates that we've previously computed these
695 values and found no suitable index registers. */
696 static int first_index_reg = -1;
697 static int last_index_reg;
698
699 #define LABEL_LIVE(LABEL) \
700 (label_live[CODE_LABEL_NUMBER (LABEL) - min_labelno])
701
702 /* Subroutine of reload_combine_split_ruids, called to fix up a single
703 ruid pointed to by *PRUID if it is higher than SPLIT_RUID. */
704
705 static inline void
reload_combine_split_one_ruid(int * pruid,int split_ruid)706 reload_combine_split_one_ruid (int *pruid, int split_ruid)
707 {
708 if (*pruid > split_ruid)
709 (*pruid)++;
710 }
711
712 /* Called when we insert a new insn in a position we've already passed in
713 the scan. Examine all our state, increasing all ruids that are higher
714 than SPLIT_RUID by one in order to make room for a new insn. */
715
716 static void
reload_combine_split_ruids(int split_ruid)717 reload_combine_split_ruids (int split_ruid)
718 {
719 unsigned i;
720
721 reload_combine_split_one_ruid (&reload_combine_ruid, split_ruid);
722 reload_combine_split_one_ruid (&last_label_ruid, split_ruid);
723 reload_combine_split_one_ruid (&last_jump_ruid, split_ruid);
724
725 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
726 {
727 int j, idx = reg_state[i].use_index;
728 reload_combine_split_one_ruid (®_state[i].use_ruid, split_ruid);
729 reload_combine_split_one_ruid (®_state[i].store_ruid, split_ruid);
730 reload_combine_split_one_ruid (®_state[i].real_store_ruid,
731 split_ruid);
732 if (idx < 0)
733 continue;
734 for (j = idx; j < RELOAD_COMBINE_MAX_USES; j++)
735 {
736 reload_combine_split_one_ruid (®_state[i].reg_use[j].ruid,
737 split_ruid);
738 }
739 }
740 }
741
742 /* Called when we are about to rescan a previously encountered insn with
743 reload_combine_note_use after modifying some part of it. This clears all
744 information about uses in that particular insn. */
745
746 static void
reload_combine_purge_insn_uses(rtx_insn * insn)747 reload_combine_purge_insn_uses (rtx_insn *insn)
748 {
749 unsigned i;
750
751 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
752 {
753 int j, k, idx = reg_state[i].use_index;
754 if (idx < 0)
755 continue;
756 j = k = RELOAD_COMBINE_MAX_USES;
757 while (j-- > idx)
758 {
759 if (reg_state[i].reg_use[j].insn != insn)
760 {
761 k--;
762 if (k != j)
763 reg_state[i].reg_use[k] = reg_state[i].reg_use[j];
764 }
765 }
766 reg_state[i].use_index = k;
767 }
768 }
769
770 /* Called when we need to forget about all uses of REGNO after an insn
771 which is identified by RUID. */
772
773 static void
reload_combine_purge_reg_uses_after_ruid(unsigned regno,int ruid)774 reload_combine_purge_reg_uses_after_ruid (unsigned regno, int ruid)
775 {
776 int j, k, idx = reg_state[regno].use_index;
777 if (idx < 0)
778 return;
779 j = k = RELOAD_COMBINE_MAX_USES;
780 while (j-- > idx)
781 {
782 if (reg_state[regno].reg_use[j].ruid >= ruid)
783 {
784 k--;
785 if (k != j)
786 reg_state[regno].reg_use[k] = reg_state[regno].reg_use[j];
787 }
788 }
789 reg_state[regno].use_index = k;
790 }
791
792 /* Find the use of REGNO with the ruid that is highest among those
793 lower than RUID_LIMIT, and return it if it is the only use of this
794 reg in the insn. Return NULL otherwise. */
795
796 static struct reg_use *
reload_combine_closest_single_use(unsigned regno,int ruid_limit)797 reload_combine_closest_single_use (unsigned regno, int ruid_limit)
798 {
799 int i, best_ruid = 0;
800 int use_idx = reg_state[regno].use_index;
801 struct reg_use *retval;
802
803 if (use_idx < 0)
804 return NULL;
805 retval = NULL;
806 for (i = use_idx; i < RELOAD_COMBINE_MAX_USES; i++)
807 {
808 struct reg_use *use = reg_state[regno].reg_use + i;
809 int this_ruid = use->ruid;
810 if (this_ruid >= ruid_limit)
811 continue;
812 if (this_ruid > best_ruid)
813 {
814 best_ruid = this_ruid;
815 retval = use;
816 }
817 else if (this_ruid == best_ruid)
818 retval = NULL;
819 }
820 if (last_label_ruid >= best_ruid)
821 return NULL;
822 return retval;
823 }
824
825 /* After we've moved an add insn, fix up any debug insns that occur
826 between the old location of the add and the new location. REG is
827 the destination register of the add insn; REPLACEMENT is the
828 SET_SRC of the add. FROM and TO specify the range in which we
829 should make this change on debug insns. */
830
831 static void
fixup_debug_insns(rtx reg,rtx replacement,rtx_insn * from,rtx_insn * to)832 fixup_debug_insns (rtx reg, rtx replacement, rtx_insn *from, rtx_insn *to)
833 {
834 rtx_insn *insn;
835 for (insn = from; insn != to; insn = NEXT_INSN (insn))
836 {
837 rtx t;
838
839 if (!DEBUG_BIND_INSN_P (insn))
840 continue;
841
842 t = INSN_VAR_LOCATION_LOC (insn);
843 t = simplify_replace_rtx (t, reg, replacement);
844 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), t, 0);
845 }
846 }
847
848 /* Subroutine of reload_combine_recognize_const_pattern. Try to replace REG
849 with SRC in the insn described by USE, taking costs into account. Return
850 true if we made the replacement. */
851
852 static bool
try_replace_in_use(struct reg_use * use,rtx reg,rtx src)853 try_replace_in_use (struct reg_use *use, rtx reg, rtx src)
854 {
855 rtx_insn *use_insn = use->insn;
856 rtx mem = use->containing_mem;
857 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
858
859 if (mem != NULL_RTX)
860 {
861 addr_space_t as = MEM_ADDR_SPACE (mem);
862 rtx oldaddr = XEXP (mem, 0);
863 rtx newaddr = NULL_RTX;
864 int old_cost = address_cost (oldaddr, GET_MODE (mem), as, speed);
865 int new_cost;
866
867 newaddr = simplify_replace_rtx (oldaddr, reg, src);
868 if (memory_address_addr_space_p (GET_MODE (mem), newaddr, as))
869 {
870 XEXP (mem, 0) = newaddr;
871 new_cost = address_cost (newaddr, GET_MODE (mem), as, speed);
872 XEXP (mem, 0) = oldaddr;
873 if (new_cost <= old_cost
874 && validate_change (use_insn,
875 &XEXP (mem, 0), newaddr, 0))
876 return true;
877 }
878 }
879 else
880 {
881 rtx new_set = single_set (use_insn);
882 if (new_set
883 && REG_P (SET_DEST (new_set))
884 && GET_CODE (SET_SRC (new_set)) == PLUS
885 && REG_P (XEXP (SET_SRC (new_set), 0))
886 && CONSTANT_P (XEXP (SET_SRC (new_set), 1)))
887 {
888 rtx new_src;
889 machine_mode mode = GET_MODE (SET_DEST (new_set));
890 int old_cost = set_src_cost (SET_SRC (new_set), mode, speed);
891
892 gcc_assert (rtx_equal_p (XEXP (SET_SRC (new_set), 0), reg));
893 new_src = simplify_replace_rtx (SET_SRC (new_set), reg, src);
894
895 if (set_src_cost (new_src, mode, speed) <= old_cost
896 && validate_change (use_insn, &SET_SRC (new_set),
897 new_src, 0))
898 return true;
899 }
900 }
901 return false;
902 }
903
904 /* Called by reload_combine when scanning INSN. This function tries to detect
905 patterns where a constant is added to a register, and the result is used
906 in an address.
907 Return true if no further processing is needed on INSN; false if it wasn't
908 recognized and should be handled normally. */
909
910 static bool
reload_combine_recognize_const_pattern(rtx_insn * insn)911 reload_combine_recognize_const_pattern (rtx_insn *insn)
912 {
913 int from_ruid = reload_combine_ruid;
914 rtx set, pat, reg, src, addreg;
915 unsigned int regno;
916 struct reg_use *use;
917 bool must_move_add;
918 rtx_insn *add_moved_after_insn = NULL;
919 int add_moved_after_ruid = 0;
920 int clobbered_regno = -1;
921
922 set = single_set (insn);
923 if (set == NULL_RTX)
924 return false;
925
926 reg = SET_DEST (set);
927 src = SET_SRC (set);
928 if (!REG_P (reg)
929 || REG_NREGS (reg) != 1
930 || GET_MODE (reg) != Pmode
931 || reg == stack_pointer_rtx)
932 return false;
933
934 regno = REGNO (reg);
935
936 /* We look for a REG1 = REG2 + CONSTANT insn, followed by either
937 uses of REG1 inside an address, or inside another add insn. If
938 possible and profitable, merge the addition into subsequent
939 uses. */
940 if (GET_CODE (src) != PLUS
941 || !REG_P (XEXP (src, 0))
942 || !CONSTANT_P (XEXP (src, 1)))
943 return false;
944
945 addreg = XEXP (src, 0);
946 must_move_add = rtx_equal_p (reg, addreg);
947
948 pat = PATTERN (insn);
949 if (must_move_add && set != pat)
950 {
951 /* We have to be careful when moving the add; apart from the
952 single_set there may also be clobbers. Recognize one special
953 case, that of one clobber alongside the set (likely a clobber
954 of the CC register). */
955 gcc_assert (GET_CODE (PATTERN (insn)) == PARALLEL);
956 if (XVECLEN (pat, 0) != 2 || XVECEXP (pat, 0, 0) != set
957 || GET_CODE (XVECEXP (pat, 0, 1)) != CLOBBER
958 || !REG_P (XEXP (XVECEXP (pat, 0, 1), 0)))
959 return false;
960 clobbered_regno = REGNO (XEXP (XVECEXP (pat, 0, 1), 0));
961 }
962
963 do
964 {
965 use = reload_combine_closest_single_use (regno, from_ruid);
966
967 if (use)
968 /* Start the search for the next use from here. */
969 from_ruid = use->ruid;
970
971 if (use && GET_MODE (*use->usep) == Pmode)
972 {
973 bool delete_add = false;
974 rtx_insn *use_insn = use->insn;
975 int use_ruid = use->ruid;
976
977 /* Avoid moving the add insn past a jump. */
978 if (must_move_add && use_ruid <= last_jump_ruid)
979 break;
980
981 /* If the add clobbers another hard reg in parallel, don't move
982 it past a real set of this hard reg. */
983 if (must_move_add && clobbered_regno >= 0
984 && reg_state[clobbered_regno].real_store_ruid >= use_ruid)
985 break;
986
987 /* Do not separate cc0 setter and cc0 user on HAVE_cc0 targets. */
988 if (HAVE_cc0 && must_move_add && sets_cc0_p (PATTERN (use_insn)))
989 break;
990
991 gcc_assert (reg_state[regno].store_ruid <= use_ruid);
992 /* Avoid moving a use of ADDREG past a point where it is stored. */
993 if (reg_state[REGNO (addreg)].store_ruid > use_ruid)
994 break;
995
996 /* We also must not move the addition past an insn that sets
997 the same register, unless we can combine two add insns. */
998 if (must_move_add && reg_state[regno].store_ruid == use_ruid)
999 {
1000 if (use->containing_mem == NULL_RTX)
1001 delete_add = true;
1002 else
1003 break;
1004 }
1005
1006 if (try_replace_in_use (use, reg, src))
1007 {
1008 reload_combine_purge_insn_uses (use_insn);
1009 reload_combine_note_use (&PATTERN (use_insn), use_insn,
1010 use_ruid, NULL_RTX);
1011
1012 if (delete_add)
1013 {
1014 fixup_debug_insns (reg, src, insn, use_insn);
1015 delete_insn (insn);
1016 return true;
1017 }
1018 if (must_move_add)
1019 {
1020 add_moved_after_insn = use_insn;
1021 add_moved_after_ruid = use_ruid;
1022 }
1023 continue;
1024 }
1025 }
1026 /* If we get here, we couldn't handle this use. */
1027 if (must_move_add)
1028 break;
1029 }
1030 while (use);
1031
1032 if (!must_move_add || add_moved_after_insn == NULL_RTX)
1033 /* Process the add normally. */
1034 return false;
1035
1036 fixup_debug_insns (reg, src, insn, add_moved_after_insn);
1037
1038 reorder_insns (insn, insn, add_moved_after_insn);
1039 reload_combine_purge_reg_uses_after_ruid (regno, add_moved_after_ruid);
1040 reload_combine_split_ruids (add_moved_after_ruid - 1);
1041 reload_combine_note_use (&PATTERN (insn), insn,
1042 add_moved_after_ruid, NULL_RTX);
1043 reg_state[regno].store_ruid = add_moved_after_ruid;
1044
1045 return true;
1046 }
1047
1048 /* Called by reload_combine when scanning INSN. Try to detect a pattern we
1049 can handle and improve. Return true if no further processing is needed on
1050 INSN; false if it wasn't recognized and should be handled normally. */
1051
1052 static bool
reload_combine_recognize_pattern(rtx_insn * insn)1053 reload_combine_recognize_pattern (rtx_insn *insn)
1054 {
1055 rtx set, reg, src;
1056
1057 set = single_set (insn);
1058 if (set == NULL_RTX)
1059 return false;
1060
1061 reg = SET_DEST (set);
1062 src = SET_SRC (set);
1063 if (!REG_P (reg) || REG_NREGS (reg) != 1)
1064 return false;
1065
1066 unsigned int regno = REGNO (reg);
1067 machine_mode mode = GET_MODE (reg);
1068
1069 if (reg_state[regno].use_index < 0
1070 || reg_state[regno].use_index >= RELOAD_COMBINE_MAX_USES)
1071 return false;
1072
1073 for (int i = reg_state[regno].use_index;
1074 i < RELOAD_COMBINE_MAX_USES; i++)
1075 {
1076 struct reg_use *use = reg_state[regno].reg_use + i;
1077 if (GET_MODE (*use->usep) != mode)
1078 return false;
1079 /* Don't try to adjust (use (REGX)). */
1080 if (GET_CODE (PATTERN (use->insn)) == USE
1081 && &XEXP (PATTERN (use->insn), 0) == use->usep)
1082 return false;
1083 }
1084
1085 /* Look for (set (REGX) (CONST_INT))
1086 (set (REGX) (PLUS (REGX) (REGY)))
1087 ...
1088 ... (MEM (REGX)) ...
1089 and convert it to
1090 (set (REGZ) (CONST_INT))
1091 ...
1092 ... (MEM (PLUS (REGZ) (REGY)))... .
1093
1094 First, check that we have (set (REGX) (PLUS (REGX) (REGY)))
1095 and that we know all uses of REGX before it dies.
1096 Also, explicitly check that REGX != REGY; our life information
1097 does not yet show whether REGY changes in this insn. */
1098
1099 if (GET_CODE (src) == PLUS
1100 && reg_state[regno].all_offsets_match
1101 && last_index_reg != -1
1102 && REG_P (XEXP (src, 1))
1103 && rtx_equal_p (XEXP (src, 0), reg)
1104 && !rtx_equal_p (XEXP (src, 1), reg)
1105 && last_label_ruid < reg_state[regno].use_ruid)
1106 {
1107 rtx base = XEXP (src, 1);
1108 rtx_insn *prev = prev_nonnote_nondebug_insn (insn);
1109 rtx prev_set = prev ? single_set (prev) : NULL_RTX;
1110 rtx index_reg = NULL_RTX;
1111 rtx reg_sum = NULL_RTX;
1112 int i;
1113
1114 /* Now we need to set INDEX_REG to an index register (denoted as
1115 REGZ in the illustration above) and REG_SUM to the expression
1116 register+register that we want to use to substitute uses of REG
1117 (typically in MEMs) with. First check REG and BASE for being
1118 index registers; we can use them even if they are not dead. */
1119 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], regno)
1120 || TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS],
1121 REGNO (base)))
1122 {
1123 index_reg = reg;
1124 reg_sum = src;
1125 }
1126 else
1127 {
1128 /* Otherwise, look for a free index register. Since we have
1129 checked above that neither REG nor BASE are index registers,
1130 if we find anything at all, it will be different from these
1131 two registers. */
1132 for (i = first_index_reg; i <= last_index_reg; i++)
1133 {
1134 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], i)
1135 && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES
1136 && reg_state[i].store_ruid <= reg_state[regno].use_ruid
1137 && (call_used_regs[i] || df_regs_ever_live_p (i))
1138 && (!frame_pointer_needed || i != HARD_FRAME_POINTER_REGNUM)
1139 && !fixed_regs[i] && !global_regs[i]
1140 && hard_regno_nregs (i, GET_MODE (reg)) == 1
1141 && targetm.hard_regno_scratch_ok (i))
1142 {
1143 index_reg = gen_rtx_REG (GET_MODE (reg), i);
1144 reg_sum = gen_rtx_PLUS (GET_MODE (reg), index_reg, base);
1145 break;
1146 }
1147 }
1148 }
1149
1150 /* Check that PREV_SET is indeed (set (REGX) (CONST_INT)) and that
1151 (REGY), i.e. BASE, is not clobbered before the last use we'll
1152 create. */
1153 if (reg_sum
1154 && prev_set
1155 && CONST_INT_P (SET_SRC (prev_set))
1156 && rtx_equal_p (SET_DEST (prev_set), reg)
1157 && (reg_state[REGNO (base)].store_ruid
1158 <= reg_state[regno].use_ruid))
1159 {
1160 /* Change destination register and, if necessary, the constant
1161 value in PREV, the constant loading instruction. */
1162 validate_change (prev, &SET_DEST (prev_set), index_reg, 1);
1163 if (reg_state[regno].offset != const0_rtx)
1164 {
1165 HOST_WIDE_INT c
1166 = trunc_int_for_mode (UINTVAL (SET_SRC (prev_set))
1167 + UINTVAL (reg_state[regno].offset),
1168 GET_MODE (index_reg));
1169 validate_change (prev, &SET_SRC (prev_set), GEN_INT (c), 1);
1170 }
1171
1172 /* Now for every use of REG that we have recorded, replace REG
1173 with REG_SUM. */
1174 for (i = reg_state[regno].use_index;
1175 i < RELOAD_COMBINE_MAX_USES; i++)
1176 validate_unshare_change (reg_state[regno].reg_use[i].insn,
1177 reg_state[regno].reg_use[i].usep,
1178 /* Each change must have its own
1179 replacement. */
1180 reg_sum, 1);
1181
1182 if (apply_change_group ())
1183 {
1184 struct reg_use *lowest_ruid = NULL;
1185
1186 /* For every new use of REG_SUM, we have to record the use
1187 of BASE therein, i.e. operand 1. */
1188 for (i = reg_state[regno].use_index;
1189 i < RELOAD_COMBINE_MAX_USES; i++)
1190 {
1191 struct reg_use *use = reg_state[regno].reg_use + i;
1192 reload_combine_note_use (&XEXP (*use->usep, 1), use->insn,
1193 use->ruid, use->containing_mem);
1194 if (lowest_ruid == NULL || use->ruid < lowest_ruid->ruid)
1195 lowest_ruid = use;
1196 }
1197
1198 fixup_debug_insns (reg, reg_sum, insn, lowest_ruid->insn);
1199
1200 /* Delete the reg-reg addition. */
1201 delete_insn (insn);
1202
1203 if (reg_state[regno].offset != const0_rtx
1204 /* Previous REG_EQUIV / REG_EQUAL notes for PREV
1205 are now invalid. */
1206 && remove_reg_equal_equiv_notes (prev))
1207 df_notes_rescan (prev);
1208
1209 reg_state[regno].use_index = RELOAD_COMBINE_MAX_USES;
1210 return true;
1211 }
1212 }
1213 }
1214 return false;
1215 }
1216
1217 static void
reload_combine(void)1218 reload_combine (void)
1219 {
1220 rtx_insn *insn, *prev;
1221 basic_block bb;
1222 unsigned int r;
1223 int min_labelno, n_labels;
1224 HARD_REG_SET ever_live_at_start, *label_live;
1225
1226 /* To avoid wasting too much time later searching for an index register,
1227 determine the minimum and maximum index register numbers. */
1228 if (INDEX_REG_CLASS == NO_REGS)
1229 last_index_reg = -1;
1230 else if (first_index_reg == -1 && last_index_reg == 0)
1231 {
1232 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1233 if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], r))
1234 {
1235 if (first_index_reg == -1)
1236 first_index_reg = r;
1237
1238 last_index_reg = r;
1239 }
1240
1241 /* If no index register is available, we can quit now. Set LAST_INDEX_REG
1242 to -1 so we'll know to quit early the next time we get here. */
1243 if (first_index_reg == -1)
1244 {
1245 last_index_reg = -1;
1246 return;
1247 }
1248 }
1249
1250 /* Set up LABEL_LIVE and EVER_LIVE_AT_START. The register lifetime
1251 information is a bit fuzzy immediately after reload, but it's
1252 still good enough to determine which registers are live at a jump
1253 destination. */
1254 min_labelno = get_first_label_num ();
1255 n_labels = max_label_num () - min_labelno;
1256 label_live = XNEWVEC (HARD_REG_SET, n_labels);
1257 CLEAR_HARD_REG_SET (ever_live_at_start);
1258
1259 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1260 {
1261 insn = BB_HEAD (bb);
1262 if (LABEL_P (insn))
1263 {
1264 HARD_REG_SET live;
1265 bitmap live_in = df_get_live_in (bb);
1266
1267 REG_SET_TO_HARD_REG_SET (live, live_in);
1268 compute_use_by_pseudos (&live, live_in);
1269 COPY_HARD_REG_SET (LABEL_LIVE (insn), live);
1270 IOR_HARD_REG_SET (ever_live_at_start, live);
1271 }
1272 }
1273
1274 /* Initialize last_label_ruid, reload_combine_ruid and reg_state. */
1275 last_label_ruid = last_jump_ruid = reload_combine_ruid = 0;
1276 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1277 {
1278 reg_state[r].store_ruid = 0;
1279 reg_state[r].real_store_ruid = 0;
1280 if (fixed_regs[r])
1281 reg_state[r].use_index = -1;
1282 else
1283 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1284 }
1285
1286 for (insn = get_last_insn (); insn; insn = prev)
1287 {
1288 bool control_flow_insn;
1289 rtx note;
1290
1291 prev = PREV_INSN (insn);
1292
1293 /* We cannot do our optimization across labels. Invalidating all the use
1294 information we have would be costly, so we just note where the label
1295 is and then later disable any optimization that would cross it. */
1296 if (LABEL_P (insn))
1297 last_label_ruid = reload_combine_ruid;
1298 else if (BARRIER_P (insn))
1299 {
1300 /* Crossing a barrier resets all the use information. */
1301 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1302 if (! fixed_regs[r])
1303 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1304 }
1305 else if (INSN_P (insn) && volatile_insn_p (PATTERN (insn)))
1306 /* Optimizations across insns being marked as volatile must be
1307 prevented. All the usage information is invalidated
1308 here. */
1309 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1310 if (! fixed_regs[r]
1311 && reg_state[r].use_index != RELOAD_COMBINE_MAX_USES)
1312 reg_state[r].use_index = -1;
1313
1314 if (! NONDEBUG_INSN_P (insn))
1315 continue;
1316
1317 reload_combine_ruid++;
1318
1319 control_flow_insn = control_flow_insn_p (insn);
1320 if (control_flow_insn)
1321 last_jump_ruid = reload_combine_ruid;
1322
1323 if (reload_combine_recognize_const_pattern (insn)
1324 || reload_combine_recognize_pattern (insn))
1325 continue;
1326
1327 note_stores (PATTERN (insn), reload_combine_note_store, NULL);
1328
1329 if (CALL_P (insn))
1330 {
1331 rtx link;
1332 HARD_REG_SET used_regs;
1333
1334 get_call_reg_set_usage (insn, &used_regs, call_used_reg_set);
1335
1336 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1337 if (TEST_HARD_REG_BIT (used_regs, r))
1338 {
1339 reg_state[r].use_index = RELOAD_COMBINE_MAX_USES;
1340 reg_state[r].store_ruid = reload_combine_ruid;
1341 }
1342
1343 for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
1344 link = XEXP (link, 1))
1345 {
1346 rtx setuse = XEXP (link, 0);
1347 rtx usage_rtx = XEXP (setuse, 0);
1348 if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER)
1349 && REG_P (usage_rtx))
1350 {
1351 unsigned int end_regno = END_REGNO (usage_rtx);
1352 for (unsigned int i = REGNO (usage_rtx); i < end_regno; ++i)
1353 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
1354 {
1355 reg_state[i].use_index = RELOAD_COMBINE_MAX_USES;
1356 reg_state[i].store_ruid = reload_combine_ruid;
1357 }
1358 else
1359 reg_state[i].use_index = -1;
1360 }
1361 }
1362 }
1363
1364 if (control_flow_insn && !ANY_RETURN_P (PATTERN (insn)))
1365 {
1366 /* Non-spill registers might be used at the call destination in
1367 some unknown fashion, so we have to mark the unknown use. */
1368 HARD_REG_SET *live;
1369
1370 if ((condjump_p (insn) || condjump_in_parallel_p (insn))
1371 && JUMP_LABEL (insn))
1372 {
1373 if (ANY_RETURN_P (JUMP_LABEL (insn)))
1374 live = NULL;
1375 else
1376 live = &LABEL_LIVE (JUMP_LABEL (insn));
1377 }
1378 else
1379 live = &ever_live_at_start;
1380
1381 if (live)
1382 for (r = 0; r < FIRST_PSEUDO_REGISTER; r++)
1383 if (TEST_HARD_REG_BIT (*live, r))
1384 reg_state[r].use_index = -1;
1385 }
1386
1387 reload_combine_note_use (&PATTERN (insn), insn, reload_combine_ruid,
1388 NULL_RTX);
1389
1390 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1391 {
1392 if (REG_NOTE_KIND (note) == REG_INC && REG_P (XEXP (note, 0)))
1393 {
1394 int regno = REGNO (XEXP (note, 0));
1395 reg_state[regno].store_ruid = reload_combine_ruid;
1396 reg_state[regno].real_store_ruid = reload_combine_ruid;
1397 reg_state[regno].use_index = -1;
1398 }
1399 }
1400 }
1401
1402 free (label_live);
1403 }
1404
1405 /* Check if DST is a register or a subreg of a register; if it is,
1406 update store_ruid, real_store_ruid and use_index in the reg_state
1407 structure accordingly. Called via note_stores from reload_combine. */
1408
1409 static void
reload_combine_note_store(rtx dst,const_rtx set,void * data ATTRIBUTE_UNUSED)1410 reload_combine_note_store (rtx dst, const_rtx set, void *data ATTRIBUTE_UNUSED)
1411 {
1412 int regno = 0;
1413 int i;
1414 machine_mode mode = GET_MODE (dst);
1415
1416 if (GET_CODE (dst) == SUBREG)
1417 {
1418 regno = subreg_regno_offset (REGNO (SUBREG_REG (dst)),
1419 GET_MODE (SUBREG_REG (dst)),
1420 SUBREG_BYTE (dst),
1421 GET_MODE (dst));
1422 dst = SUBREG_REG (dst);
1423 }
1424
1425 /* Some targets do argument pushes without adding REG_INC notes. */
1426
1427 if (MEM_P (dst))
1428 {
1429 dst = XEXP (dst, 0);
1430 if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC
1431 || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC
1432 || GET_CODE (dst) == PRE_MODIFY || GET_CODE (dst) == POST_MODIFY)
1433 {
1434 unsigned int end_regno = END_REGNO (XEXP (dst, 0));
1435 for (unsigned int i = REGNO (XEXP (dst, 0)); i < end_regno; ++i)
1436 {
1437 /* We could probably do better, but for now mark the register
1438 as used in an unknown fashion and set/clobbered at this
1439 insn. */
1440 reg_state[i].use_index = -1;
1441 reg_state[i].store_ruid = reload_combine_ruid;
1442 reg_state[i].real_store_ruid = reload_combine_ruid;
1443 }
1444 }
1445 else
1446 return;
1447 }
1448
1449 if (!REG_P (dst))
1450 return;
1451 regno += REGNO (dst);
1452
1453 /* note_stores might have stripped a STRICT_LOW_PART, so we have to be
1454 careful with registers / register parts that are not full words.
1455 Similarly for ZERO_EXTRACT. */
1456 if (GET_CODE (SET_DEST (set)) == ZERO_EXTRACT
1457 || GET_CODE (SET_DEST (set)) == STRICT_LOW_PART)
1458 {
1459 for (i = end_hard_regno (mode, regno) - 1; i >= regno; i--)
1460 {
1461 reg_state[i].use_index = -1;
1462 reg_state[i].store_ruid = reload_combine_ruid;
1463 reg_state[i].real_store_ruid = reload_combine_ruid;
1464 }
1465 }
1466 else
1467 {
1468 for (i = end_hard_regno (mode, regno) - 1; i >= regno; i--)
1469 {
1470 reg_state[i].store_ruid = reload_combine_ruid;
1471 if (GET_CODE (set) == SET)
1472 reg_state[i].real_store_ruid = reload_combine_ruid;
1473 reg_state[i].use_index = RELOAD_COMBINE_MAX_USES;
1474 }
1475 }
1476 }
1477
1478 /* XP points to a piece of rtl that has to be checked for any uses of
1479 registers.
1480 *XP is the pattern of INSN, or a part of it.
1481 Called from reload_combine, and recursively by itself. */
1482 static void
reload_combine_note_use(rtx * xp,rtx_insn * insn,int ruid,rtx containing_mem)1483 reload_combine_note_use (rtx *xp, rtx_insn *insn, int ruid, rtx containing_mem)
1484 {
1485 rtx x = *xp;
1486 enum rtx_code code = x->code;
1487 const char *fmt;
1488 int i, j;
1489 rtx offset = const0_rtx; /* For the REG case below. */
1490
1491 switch (code)
1492 {
1493 case SET:
1494 if (REG_P (SET_DEST (x)))
1495 {
1496 reload_combine_note_use (&SET_SRC (x), insn, ruid, NULL_RTX);
1497 return;
1498 }
1499 break;
1500
1501 case USE:
1502 /* If this is the USE of a return value, we can't change it. */
1503 if (REG_P (XEXP (x, 0)) && REG_FUNCTION_VALUE_P (XEXP (x, 0)))
1504 {
1505 /* Mark the return register as used in an unknown fashion. */
1506 rtx reg = XEXP (x, 0);
1507 unsigned int end_regno = END_REGNO (reg);
1508 for (unsigned int regno = REGNO (reg); regno < end_regno; ++regno)
1509 reg_state[regno].use_index = -1;
1510 return;
1511 }
1512 break;
1513
1514 case CLOBBER:
1515 if (REG_P (SET_DEST (x)))
1516 {
1517 /* No spurious CLOBBERs of pseudo registers may remain. */
1518 gcc_assert (REGNO (SET_DEST (x)) < FIRST_PSEUDO_REGISTER);
1519 return;
1520 }
1521 break;
1522
1523 case PLUS:
1524 /* We are interested in (plus (reg) (const_int)) . */
1525 if (!REG_P (XEXP (x, 0))
1526 || !CONST_INT_P (XEXP (x, 1)))
1527 break;
1528 offset = XEXP (x, 1);
1529 x = XEXP (x, 0);
1530 /* Fall through. */
1531 case REG:
1532 {
1533 int regno = REGNO (x);
1534 int use_index;
1535 int nregs;
1536
1537 /* No spurious USEs of pseudo registers may remain. */
1538 gcc_assert (regno < FIRST_PSEUDO_REGISTER);
1539
1540 nregs = REG_NREGS (x);
1541
1542 /* We can't substitute into multi-hard-reg uses. */
1543 if (nregs > 1)
1544 {
1545 while (--nregs >= 0)
1546 reg_state[regno + nregs].use_index = -1;
1547 return;
1548 }
1549
1550 /* We may be called to update uses in previously seen insns.
1551 Don't add uses beyond the last store we saw. */
1552 if (ruid < reg_state[regno].store_ruid)
1553 return;
1554
1555 /* If this register is already used in some unknown fashion, we
1556 can't do anything.
1557 If we decrement the index from zero to -1, we can't store more
1558 uses, so this register becomes used in an unknown fashion. */
1559 use_index = --reg_state[regno].use_index;
1560 if (use_index < 0)
1561 return;
1562
1563 if (use_index == RELOAD_COMBINE_MAX_USES - 1)
1564 {
1565 /* This is the first use of this register we have seen since we
1566 marked it as dead. */
1567 reg_state[regno].offset = offset;
1568 reg_state[regno].all_offsets_match = true;
1569 reg_state[regno].use_ruid = ruid;
1570 }
1571 else
1572 {
1573 if (reg_state[regno].use_ruid > ruid)
1574 reg_state[regno].use_ruid = ruid;
1575
1576 if (! rtx_equal_p (offset, reg_state[regno].offset))
1577 reg_state[regno].all_offsets_match = false;
1578 }
1579
1580 reg_state[regno].reg_use[use_index].insn = insn;
1581 reg_state[regno].reg_use[use_index].ruid = ruid;
1582 reg_state[regno].reg_use[use_index].containing_mem = containing_mem;
1583 reg_state[regno].reg_use[use_index].usep = xp;
1584 return;
1585 }
1586
1587 case MEM:
1588 containing_mem = x;
1589 break;
1590
1591 default:
1592 break;
1593 }
1594
1595 /* Recursively process the components of X. */
1596 fmt = GET_RTX_FORMAT (code);
1597 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1598 {
1599 if (fmt[i] == 'e')
1600 reload_combine_note_use (&XEXP (x, i), insn, ruid, containing_mem);
1601 else if (fmt[i] == 'E')
1602 {
1603 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1604 reload_combine_note_use (&XVECEXP (x, i, j), insn, ruid,
1605 containing_mem);
1606 }
1607 }
1608 }
1609
1610 /* See if we can reduce the cost of a constant by replacing a move
1611 with an add. We track situations in which a register is set to a
1612 constant or to a register plus a constant. */
1613 /* We cannot do our optimization across labels. Invalidating all the
1614 information about register contents we have would be costly, so we
1615 use move2add_last_label_luid to note where the label is and then
1616 later disable any optimization that would cross it.
1617 reg_offset[n] / reg_base_reg[n] / reg_symbol_ref[n] / reg_mode[n]
1618 are only valid if reg_set_luid[n] is greater than
1619 move2add_last_label_luid.
1620 For a set that established a new (potential) base register with
1621 non-constant value, we use move2add_luid from the place where the
1622 setting insn is encountered; registers based off that base then
1623 get the same reg_set_luid. Constants all get
1624 move2add_last_label_luid + 1 as their reg_set_luid. */
1625 static int reg_set_luid[FIRST_PSEUDO_REGISTER];
1626
1627 /* If reg_base_reg[n] is negative, register n has been set to
1628 reg_offset[n] or reg_symbol_ref[n] + reg_offset[n] in mode reg_mode[n].
1629 If reg_base_reg[n] is non-negative, register n has been set to the
1630 sum of reg_offset[n] and the value of register reg_base_reg[n]
1631 before reg_set_luid[n], calculated in mode reg_mode[n] .
1632 For multi-hard-register registers, all but the first one are
1633 recorded as BLKmode in reg_mode. Setting reg_mode to VOIDmode
1634 marks it as invalid. */
1635 static HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER];
1636 static int reg_base_reg[FIRST_PSEUDO_REGISTER];
1637 static rtx reg_symbol_ref[FIRST_PSEUDO_REGISTER];
1638 static machine_mode reg_mode[FIRST_PSEUDO_REGISTER];
1639
1640 /* move2add_luid is linearly increased while scanning the instructions
1641 from first to last. It is used to set reg_set_luid in
1642 reload_cse_move2add and move2add_note_store. */
1643 static int move2add_luid;
1644
1645 /* move2add_last_label_luid is set whenever a label is found. Labels
1646 invalidate all previously collected reg_offset data. */
1647 static int move2add_last_label_luid;
1648
1649 /* ??? We don't know how zero / sign extension is handled, hence we
1650 can't go from a narrower to a wider mode. */
1651 #define MODES_OK_FOR_MOVE2ADD(OUTMODE, INMODE) \
1652 (GET_MODE_SIZE (OUTMODE) == GET_MODE_SIZE (INMODE) \
1653 || (GET_MODE_SIZE (OUTMODE) <= GET_MODE_SIZE (INMODE) \
1654 && TRULY_NOOP_TRUNCATION_MODES_P (OUTMODE, INMODE)))
1655
1656 /* Record that REG is being set to a value with the mode of REG. */
1657
1658 static void
move2add_record_mode(rtx reg)1659 move2add_record_mode (rtx reg)
1660 {
1661 int regno, nregs;
1662 machine_mode mode = GET_MODE (reg);
1663
1664 if (GET_CODE (reg) == SUBREG)
1665 {
1666 regno = subreg_regno (reg);
1667 nregs = subreg_nregs (reg);
1668 }
1669 else if (REG_P (reg))
1670 {
1671 regno = REGNO (reg);
1672 nregs = REG_NREGS (reg);
1673 }
1674 else
1675 gcc_unreachable ();
1676 for (int i = nregs - 1; i > 0; i--)
1677 reg_mode[regno + i] = BLKmode;
1678 reg_mode[regno] = mode;
1679 }
1680
1681 /* Record that REG is being set to the sum of SYM and OFF. */
1682
1683 static void
move2add_record_sym_value(rtx reg,rtx sym,rtx off)1684 move2add_record_sym_value (rtx reg, rtx sym, rtx off)
1685 {
1686 int regno = REGNO (reg);
1687
1688 move2add_record_mode (reg);
1689 reg_set_luid[regno] = move2add_luid;
1690 reg_base_reg[regno] = -1;
1691 reg_symbol_ref[regno] = sym;
1692 reg_offset[regno] = INTVAL (off);
1693 }
1694
1695 /* Check if REGNO contains a valid value in MODE. */
1696
1697 static bool
move2add_valid_value_p(int regno,scalar_int_mode mode)1698 move2add_valid_value_p (int regno, scalar_int_mode mode)
1699 {
1700 if (reg_set_luid[regno] <= move2add_last_label_luid)
1701 return false;
1702
1703 if (mode != reg_mode[regno])
1704 {
1705 scalar_int_mode old_mode;
1706 if (!is_a <scalar_int_mode> (reg_mode[regno], &old_mode)
1707 || !MODES_OK_FOR_MOVE2ADD (mode, old_mode)
1708 || !REG_CAN_CHANGE_MODE_P (regno, old_mode, mode))
1709 return false;
1710 /* The value loaded into regno in reg_mode[regno] is also valid in
1711 mode after truncation only if (REG:mode regno) is the lowpart of
1712 (REG:reg_mode[regno] regno). Now, for big endian, the starting
1713 regno of the lowpart might be different. */
1714 poly_int64 s_off = subreg_lowpart_offset (mode, old_mode);
1715 s_off = subreg_regno_offset (regno, old_mode, s_off, mode);
1716 if (maybe_ne (s_off, 0))
1717 /* We could in principle adjust regno, check reg_mode[regno] to be
1718 BLKmode, and return s_off to the caller (vs. -1 for failure),
1719 but we currently have no callers that could make use of this
1720 information. */
1721 return false;
1722 }
1723
1724 for (int i = end_hard_regno (mode, regno) - 1; i > regno; i--)
1725 if (reg_mode[i] != BLKmode)
1726 return false;
1727 return true;
1728 }
1729
1730 /* This function is called with INSN that sets REG (of mode MODE)
1731 to (SYM + OFF), while REG is known to already have value (SYM + offset).
1732 This function tries to change INSN into an add instruction
1733 (set (REG) (plus (REG) (OFF - offset))) using the known value.
1734 It also updates the information about REG's known value.
1735 Return true if we made a change. */
1736
1737 static bool
move2add_use_add2_insn(scalar_int_mode mode,rtx reg,rtx sym,rtx off,rtx_insn * insn)1738 move2add_use_add2_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
1739 rtx_insn *insn)
1740 {
1741 rtx pat = PATTERN (insn);
1742 rtx src = SET_SRC (pat);
1743 int regno = REGNO (reg);
1744 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[regno], mode);
1745 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
1746 bool changed = false;
1747
1748 /* (set (reg) (plus (reg) (const_int 0))) is not canonical;
1749 use (set (reg) (reg)) instead.
1750 We don't delete this insn, nor do we convert it into a
1751 note, to avoid losing register notes or the return
1752 value flag. jump2 already knows how to get rid of
1753 no-op moves. */
1754 if (new_src == const0_rtx)
1755 {
1756 /* If the constants are different, this is a
1757 truncation, that, if turned into (set (reg)
1758 (reg)), would be discarded. Maybe we should
1759 try a truncMN pattern? */
1760 if (INTVAL (off) == reg_offset [regno])
1761 changed = validate_change (insn, &SET_SRC (pat), reg, 0);
1762 }
1763 else
1764 {
1765 struct full_rtx_costs oldcst, newcst;
1766 rtx tem = gen_rtx_PLUS (mode, reg, new_src);
1767
1768 get_full_set_rtx_cost (pat, &oldcst);
1769 SET_SRC (pat) = tem;
1770 get_full_set_rtx_cost (pat, &newcst);
1771 SET_SRC (pat) = src;
1772
1773 if (costs_lt_p (&newcst, &oldcst, speed)
1774 && have_add2_insn (reg, new_src))
1775 changed = validate_change (insn, &SET_SRC (pat), tem, 0);
1776 else if (sym == NULL_RTX && mode != BImode)
1777 {
1778 scalar_int_mode narrow_mode;
1779 FOR_EACH_MODE_UNTIL (narrow_mode, mode)
1780 {
1781 if (have_insn_for (STRICT_LOW_PART, narrow_mode)
1782 && ((reg_offset[regno] & ~GET_MODE_MASK (narrow_mode))
1783 == (INTVAL (off) & ~GET_MODE_MASK (narrow_mode))))
1784 {
1785 rtx narrow_reg = gen_lowpart_common (narrow_mode, reg);
1786 rtx narrow_src = gen_int_mode (INTVAL (off),
1787 narrow_mode);
1788 rtx new_set
1789 = gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode,
1790 narrow_reg),
1791 narrow_src);
1792 get_full_set_rtx_cost (new_set, &newcst);
1793 if (costs_lt_p (&newcst, &oldcst, speed))
1794 {
1795 changed = validate_change (insn, &PATTERN (insn),
1796 new_set, 0);
1797 if (changed)
1798 break;
1799 }
1800 }
1801 }
1802 }
1803 }
1804 move2add_record_sym_value (reg, sym, off);
1805 return changed;
1806 }
1807
1808
1809 /* This function is called with INSN that sets REG (of mode MODE) to
1810 (SYM + OFF), but REG doesn't have known value (SYM + offset). This
1811 function tries to find another register which is known to already have
1812 value (SYM + offset) and change INSN into an add instruction
1813 (set (REG) (plus (the found register) (OFF - offset))) if such
1814 a register is found. It also updates the information about
1815 REG's known value.
1816 Return true iff we made a change. */
1817
1818 static bool
move2add_use_add3_insn(scalar_int_mode mode,rtx reg,rtx sym,rtx off,rtx_insn * insn)1819 move2add_use_add3_insn (scalar_int_mode mode, rtx reg, rtx sym, rtx off,
1820 rtx_insn *insn)
1821 {
1822 rtx pat = PATTERN (insn);
1823 rtx src = SET_SRC (pat);
1824 int regno = REGNO (reg);
1825 int min_regno = 0;
1826 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
1827 int i;
1828 bool changed = false;
1829 struct full_rtx_costs oldcst, newcst, mincst;
1830 rtx plus_expr;
1831
1832 init_costs_to_max (&mincst);
1833 get_full_set_rtx_cost (pat, &oldcst);
1834
1835 plus_expr = gen_rtx_PLUS (GET_MODE (reg), reg, const0_rtx);
1836 SET_SRC (pat) = plus_expr;
1837
1838 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1839 if (move2add_valid_value_p (i, mode)
1840 && reg_base_reg[i] < 0
1841 && reg_symbol_ref[i] != NULL_RTX
1842 && rtx_equal_p (sym, reg_symbol_ref[i]))
1843 {
1844 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[i],
1845 GET_MODE (reg));
1846 /* (set (reg) (plus (reg) (const_int 0))) is not canonical;
1847 use (set (reg) (reg)) instead.
1848 We don't delete this insn, nor do we convert it into a
1849 note, to avoid losing register notes or the return
1850 value flag. jump2 already knows how to get rid of
1851 no-op moves. */
1852 if (new_src == const0_rtx)
1853 {
1854 init_costs_to_zero (&mincst);
1855 min_regno = i;
1856 break;
1857 }
1858 else
1859 {
1860 XEXP (plus_expr, 1) = new_src;
1861 get_full_set_rtx_cost (pat, &newcst);
1862
1863 if (costs_lt_p (&newcst, &mincst, speed))
1864 {
1865 mincst = newcst;
1866 min_regno = i;
1867 }
1868 }
1869 }
1870 SET_SRC (pat) = src;
1871
1872 if (costs_lt_p (&mincst, &oldcst, speed))
1873 {
1874 rtx tem;
1875
1876 tem = gen_rtx_REG (GET_MODE (reg), min_regno);
1877 if (i != min_regno)
1878 {
1879 rtx new_src = gen_int_mode (UINTVAL (off) - reg_offset[min_regno],
1880 GET_MODE (reg));
1881 tem = gen_rtx_PLUS (GET_MODE (reg), tem, new_src);
1882 }
1883 if (validate_change (insn, &SET_SRC (pat), tem, 0))
1884 changed = true;
1885 }
1886 reg_set_luid[regno] = move2add_luid;
1887 move2add_record_sym_value (reg, sym, off);
1888 return changed;
1889 }
1890
1891 /* Convert move insns with constant inputs to additions if they are cheaper.
1892 Return true if any changes were made. */
1893 static bool
reload_cse_move2add(rtx_insn * first)1894 reload_cse_move2add (rtx_insn *first)
1895 {
1896 int i;
1897 rtx_insn *insn;
1898 bool changed = false;
1899
1900 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
1901 {
1902 reg_set_luid[i] = 0;
1903 reg_offset[i] = 0;
1904 reg_base_reg[i] = 0;
1905 reg_symbol_ref[i] = NULL_RTX;
1906 reg_mode[i] = VOIDmode;
1907 }
1908
1909 move2add_last_label_luid = 0;
1910 move2add_luid = 2;
1911 for (insn = first; insn; insn = NEXT_INSN (insn), move2add_luid++)
1912 {
1913 rtx pat, note;
1914
1915 if (LABEL_P (insn))
1916 {
1917 move2add_last_label_luid = move2add_luid;
1918 /* We're going to increment move2add_luid twice after a
1919 label, so that we can use move2add_last_label_luid + 1 as
1920 the luid for constants. */
1921 move2add_luid++;
1922 continue;
1923 }
1924 if (! INSN_P (insn))
1925 continue;
1926 pat = PATTERN (insn);
1927 /* For simplicity, we only perform this optimization on
1928 straightforward SETs. */
1929 scalar_int_mode mode;
1930 if (GET_CODE (pat) == SET
1931 && REG_P (SET_DEST (pat))
1932 && is_a <scalar_int_mode> (GET_MODE (SET_DEST (pat)), &mode))
1933 {
1934 rtx reg = SET_DEST (pat);
1935 int regno = REGNO (reg);
1936 rtx src = SET_SRC (pat);
1937
1938 /* Check if we have valid information on the contents of this
1939 register in the mode of REG. */
1940 if (move2add_valid_value_p (regno, mode)
1941 && dbg_cnt (cse2_move2add))
1942 {
1943 /* Try to transform (set (REGX) (CONST_INT A))
1944 ...
1945 (set (REGX) (CONST_INT B))
1946 to
1947 (set (REGX) (CONST_INT A))
1948 ...
1949 (set (REGX) (plus (REGX) (CONST_INT B-A)))
1950 or
1951 (set (REGX) (CONST_INT A))
1952 ...
1953 (set (STRICT_LOW_PART (REGX)) (CONST_INT B))
1954 */
1955
1956 if (CONST_INT_P (src)
1957 && reg_base_reg[regno] < 0
1958 && reg_symbol_ref[regno] == NULL_RTX)
1959 {
1960 changed |= move2add_use_add2_insn (mode, reg, NULL_RTX,
1961 src, insn);
1962 continue;
1963 }
1964
1965 /* Try to transform (set (REGX) (REGY))
1966 (set (REGX) (PLUS (REGX) (CONST_INT A)))
1967 ...
1968 (set (REGX) (REGY))
1969 (set (REGX) (PLUS (REGX) (CONST_INT B)))
1970 to
1971 (set (REGX) (REGY))
1972 (set (REGX) (PLUS (REGX) (CONST_INT A)))
1973 ...
1974 (set (REGX) (plus (REGX) (CONST_INT B-A))) */
1975 else if (REG_P (src)
1976 && reg_set_luid[regno] == reg_set_luid[REGNO (src)]
1977 && reg_base_reg[regno] == reg_base_reg[REGNO (src)]
1978 && move2add_valid_value_p (REGNO (src), mode))
1979 {
1980 rtx_insn *next = next_nonnote_nondebug_insn (insn);
1981 rtx set = NULL_RTX;
1982 if (next)
1983 set = single_set (next);
1984 if (set
1985 && SET_DEST (set) == reg
1986 && GET_CODE (SET_SRC (set)) == PLUS
1987 && XEXP (SET_SRC (set), 0) == reg
1988 && CONST_INT_P (XEXP (SET_SRC (set), 1)))
1989 {
1990 rtx src3 = XEXP (SET_SRC (set), 1);
1991 unsigned HOST_WIDE_INT added_offset = UINTVAL (src3);
1992 HOST_WIDE_INT base_offset = reg_offset[REGNO (src)];
1993 HOST_WIDE_INT regno_offset = reg_offset[regno];
1994 rtx new_src =
1995 gen_int_mode (added_offset
1996 + base_offset
1997 - regno_offset,
1998 mode);
1999 bool success = false;
2000 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
2001
2002 if (new_src == const0_rtx)
2003 /* See above why we create (set (reg) (reg)) here. */
2004 success
2005 = validate_change (next, &SET_SRC (set), reg, 0);
2006 else
2007 {
2008 rtx old_src = SET_SRC (set);
2009 struct full_rtx_costs oldcst, newcst;
2010 rtx tem = gen_rtx_PLUS (mode, reg, new_src);
2011
2012 get_full_set_rtx_cost (set, &oldcst);
2013 SET_SRC (set) = tem;
2014 get_full_set_src_cost (tem, mode, &newcst);
2015 SET_SRC (set) = old_src;
2016 costs_add_n_insns (&oldcst, 1);
2017
2018 if (costs_lt_p (&newcst, &oldcst, speed)
2019 && have_add2_insn (reg, new_src))
2020 {
2021 rtx newpat = gen_rtx_SET (reg, tem);
2022 success
2023 = validate_change (next, &PATTERN (next),
2024 newpat, 0);
2025 }
2026 }
2027 if (success)
2028 delete_insn (insn);
2029 changed |= success;
2030 insn = next;
2031 move2add_record_mode (reg);
2032 reg_offset[regno]
2033 = trunc_int_for_mode (added_offset + base_offset,
2034 mode);
2035 continue;
2036 }
2037 }
2038 }
2039
2040 /* Try to transform
2041 (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A))))
2042 ...
2043 (set (REGY) (CONST (PLUS (SYMBOL_REF) (CONST_INT B))))
2044 to
2045 (set (REGX) (CONST (PLUS (SYMBOL_REF) (CONST_INT A))))
2046 ...
2047 (set (REGY) (CONST (PLUS (REGX) (CONST_INT B-A)))) */
2048 if ((GET_CODE (src) == SYMBOL_REF
2049 || (GET_CODE (src) == CONST
2050 && GET_CODE (XEXP (src, 0)) == PLUS
2051 && GET_CODE (XEXP (XEXP (src, 0), 0)) == SYMBOL_REF
2052 && CONST_INT_P (XEXP (XEXP (src, 0), 1))))
2053 && dbg_cnt (cse2_move2add))
2054 {
2055 rtx sym, off;
2056
2057 if (GET_CODE (src) == SYMBOL_REF)
2058 {
2059 sym = src;
2060 off = const0_rtx;
2061 }
2062 else
2063 {
2064 sym = XEXP (XEXP (src, 0), 0);
2065 off = XEXP (XEXP (src, 0), 1);
2066 }
2067
2068 /* If the reg already contains the value which is sum of
2069 sym and some constant value, we can use an add2 insn. */
2070 if (move2add_valid_value_p (regno, mode)
2071 && reg_base_reg[regno] < 0
2072 && reg_symbol_ref[regno] != NULL_RTX
2073 && rtx_equal_p (sym, reg_symbol_ref[regno]))
2074 changed |= move2add_use_add2_insn (mode, reg, sym, off, insn);
2075
2076 /* Otherwise, we have to find a register whose value is sum
2077 of sym and some constant value. */
2078 else
2079 changed |= move2add_use_add3_insn (mode, reg, sym, off, insn);
2080
2081 continue;
2082 }
2083 }
2084
2085 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
2086 {
2087 if (REG_NOTE_KIND (note) == REG_INC
2088 && REG_P (XEXP (note, 0)))
2089 {
2090 /* Reset the information about this register. */
2091 int regno = REGNO (XEXP (note, 0));
2092 if (regno < FIRST_PSEUDO_REGISTER)
2093 {
2094 move2add_record_mode (XEXP (note, 0));
2095 reg_mode[regno] = VOIDmode;
2096 }
2097 }
2098 }
2099 note_stores (PATTERN (insn), move2add_note_store, insn);
2100
2101 /* If INSN is a conditional branch, we try to extract an
2102 implicit set out of it. */
2103 if (any_condjump_p (insn))
2104 {
2105 rtx cnd = fis_get_condition (insn);
2106
2107 if (cnd != NULL_RTX
2108 && GET_CODE (cnd) == NE
2109 && REG_P (XEXP (cnd, 0))
2110 && !reg_set_p (XEXP (cnd, 0), insn)
2111 /* The following two checks, which are also in
2112 move2add_note_store, are intended to reduce the
2113 number of calls to gen_rtx_SET to avoid memory
2114 allocation if possible. */
2115 && SCALAR_INT_MODE_P (GET_MODE (XEXP (cnd, 0)))
2116 && REG_NREGS (XEXP (cnd, 0)) == 1
2117 && CONST_INT_P (XEXP (cnd, 1)))
2118 {
2119 rtx implicit_set =
2120 gen_rtx_SET (XEXP (cnd, 0), XEXP (cnd, 1));
2121 move2add_note_store (SET_DEST (implicit_set), implicit_set, insn);
2122 }
2123 }
2124
2125 /* If this is a CALL_INSN, all call used registers are stored with
2126 unknown values. */
2127 if (CALL_P (insn))
2128 {
2129 rtx link;
2130
2131 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
2132 {
2133 if (call_used_regs[i])
2134 /* Reset the information about this register. */
2135 reg_mode[i] = VOIDmode;
2136 }
2137
2138 for (link = CALL_INSN_FUNCTION_USAGE (insn); link;
2139 link = XEXP (link, 1))
2140 {
2141 rtx setuse = XEXP (link, 0);
2142 rtx usage_rtx = XEXP (setuse, 0);
2143 if (GET_CODE (setuse) == CLOBBER
2144 && REG_P (usage_rtx))
2145 {
2146 unsigned int end_regno = END_REGNO (usage_rtx);
2147 for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r)
2148 /* Reset the information about this register. */
2149 reg_mode[r] = VOIDmode;
2150 }
2151 }
2152 }
2153 }
2154 return changed;
2155 }
2156
2157 /* SET is a SET or CLOBBER that sets DST. DATA is the insn which
2158 contains SET.
2159 Update reg_set_luid, reg_offset and reg_base_reg accordingly.
2160 Called from reload_cse_move2add via note_stores. */
2161
2162 static void
move2add_note_store(rtx dst,const_rtx set,void * data)2163 move2add_note_store (rtx dst, const_rtx set, void *data)
2164 {
2165 rtx_insn *insn = (rtx_insn *) data;
2166 unsigned int regno = 0;
2167 scalar_int_mode mode;
2168
2169 /* Some targets do argument pushes without adding REG_INC notes. */
2170
2171 if (MEM_P (dst))
2172 {
2173 dst = XEXP (dst, 0);
2174 if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC
2175 || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC)
2176 reg_mode[REGNO (XEXP (dst, 0))] = VOIDmode;
2177 return;
2178 }
2179
2180 if (GET_CODE (dst) == SUBREG)
2181 regno = subreg_regno (dst);
2182 else if (REG_P (dst))
2183 regno = REGNO (dst);
2184 else
2185 return;
2186
2187 if (!is_a <scalar_int_mode> (GET_MODE (dst), &mode))
2188 goto invalidate;
2189
2190 if (GET_CODE (set) == SET)
2191 {
2192 rtx note, sym = NULL_RTX;
2193 rtx off;
2194
2195 note = find_reg_equal_equiv_note (insn);
2196 if (note && GET_CODE (XEXP (note, 0)) == SYMBOL_REF)
2197 {
2198 sym = XEXP (note, 0);
2199 off = const0_rtx;
2200 }
2201 else if (note && GET_CODE (XEXP (note, 0)) == CONST
2202 && GET_CODE (XEXP (XEXP (note, 0), 0)) == PLUS
2203 && GET_CODE (XEXP (XEXP (XEXP (note, 0), 0), 0)) == SYMBOL_REF
2204 && CONST_INT_P (XEXP (XEXP (XEXP (note, 0), 0), 1)))
2205 {
2206 sym = XEXP (XEXP (XEXP (note, 0), 0), 0);
2207 off = XEXP (XEXP (XEXP (note, 0), 0), 1);
2208 }
2209
2210 if (sym != NULL_RTX)
2211 {
2212 move2add_record_sym_value (dst, sym, off);
2213 return;
2214 }
2215 }
2216
2217 if (GET_CODE (set) == SET
2218 && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2219 && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2220 {
2221 rtx src = SET_SRC (set);
2222 rtx base_reg;
2223 unsigned HOST_WIDE_INT offset;
2224 int base_regno;
2225
2226 switch (GET_CODE (src))
2227 {
2228 case PLUS:
2229 if (REG_P (XEXP (src, 0)))
2230 {
2231 base_reg = XEXP (src, 0);
2232
2233 if (CONST_INT_P (XEXP (src, 1)))
2234 offset = UINTVAL (XEXP (src, 1));
2235 else if (REG_P (XEXP (src, 1))
2236 && move2add_valid_value_p (REGNO (XEXP (src, 1)), mode))
2237 {
2238 if (reg_base_reg[REGNO (XEXP (src, 1))] < 0
2239 && reg_symbol_ref[REGNO (XEXP (src, 1))] == NULL_RTX)
2240 offset = reg_offset[REGNO (XEXP (src, 1))];
2241 /* Maybe the first register is known to be a
2242 constant. */
2243 else if (move2add_valid_value_p (REGNO (base_reg), mode)
2244 && reg_base_reg[REGNO (base_reg)] < 0
2245 && reg_symbol_ref[REGNO (base_reg)] == NULL_RTX)
2246 {
2247 offset = reg_offset[REGNO (base_reg)];
2248 base_reg = XEXP (src, 1);
2249 }
2250 else
2251 goto invalidate;
2252 }
2253 else
2254 goto invalidate;
2255
2256 break;
2257 }
2258
2259 goto invalidate;
2260
2261 case REG:
2262 base_reg = src;
2263 offset = 0;
2264 break;
2265
2266 case CONST_INT:
2267 /* Start tracking the register as a constant. */
2268 reg_base_reg[regno] = -1;
2269 reg_symbol_ref[regno] = NULL_RTX;
2270 reg_offset[regno] = INTVAL (SET_SRC (set));
2271 /* We assign the same luid to all registers set to constants. */
2272 reg_set_luid[regno] = move2add_last_label_luid + 1;
2273 move2add_record_mode (dst);
2274 return;
2275
2276 default:
2277 goto invalidate;
2278 }
2279
2280 base_regno = REGNO (base_reg);
2281 /* If information about the base register is not valid, set it
2282 up as a new base register, pretending its value is known
2283 starting from the current insn. */
2284 if (!move2add_valid_value_p (base_regno, mode))
2285 {
2286 reg_base_reg[base_regno] = base_regno;
2287 reg_symbol_ref[base_regno] = NULL_RTX;
2288 reg_offset[base_regno] = 0;
2289 reg_set_luid[base_regno] = move2add_luid;
2290 gcc_assert (GET_MODE (base_reg) == mode);
2291 move2add_record_mode (base_reg);
2292 }
2293
2294 /* Copy base information from our base register. */
2295 reg_set_luid[regno] = reg_set_luid[base_regno];
2296 reg_base_reg[regno] = reg_base_reg[base_regno];
2297 reg_symbol_ref[regno] = reg_symbol_ref[base_regno];
2298
2299 /* Compute the sum of the offsets or constants. */
2300 reg_offset[regno]
2301 = trunc_int_for_mode (offset + reg_offset[base_regno], mode);
2302
2303 move2add_record_mode (dst);
2304 }
2305 else
2306 {
2307 invalidate:
2308 /* Invalidate the contents of the register. */
2309 move2add_record_mode (dst);
2310 reg_mode[regno] = VOIDmode;
2311 }
2312 }
2313
2314 namespace {
2315
2316 const pass_data pass_data_postreload_cse =
2317 {
2318 RTL_PASS, /* type */
2319 "postreload", /* name */
2320 OPTGROUP_NONE, /* optinfo_flags */
2321 TV_RELOAD_CSE_REGS, /* tv_id */
2322 0, /* properties_required */
2323 0, /* properties_provided */
2324 0, /* properties_destroyed */
2325 0, /* todo_flags_start */
2326 TODO_df_finish, /* todo_flags_finish */
2327 };
2328
2329 class pass_postreload_cse : public rtl_opt_pass
2330 {
2331 public:
pass_postreload_cse(gcc::context * ctxt)2332 pass_postreload_cse (gcc::context *ctxt)
2333 : rtl_opt_pass (pass_data_postreload_cse, ctxt)
2334 {}
2335
2336 /* opt_pass methods: */
gate(function *)2337 virtual bool gate (function *) { return (optimize > 0 && reload_completed); }
2338
2339 virtual unsigned int execute (function *);
2340
2341 }; // class pass_postreload_cse
2342
2343 unsigned int
execute(function * fun)2344 pass_postreload_cse::execute (function *fun)
2345 {
2346 if (!dbg_cnt (postreload_cse))
2347 return 0;
2348
2349 /* Do a very simple CSE pass over just the hard registers. */
2350 reload_cse_regs (get_insns ());
2351 /* Reload_cse_regs can eliminate potentially-trapping MEMs.
2352 Remove any EH edges associated with them. */
2353 if (fun->can_throw_non_call_exceptions
2354 && purge_all_dead_edges ())
2355 cleanup_cfg (0);
2356
2357 return 0;
2358 }
2359
2360 } // anon namespace
2361
2362 rtl_opt_pass *
make_pass_postreload_cse(gcc::context * ctxt)2363 make_pass_postreload_cse (gcc::context *ctxt)
2364 {
2365 return new pass_postreload_cse (ctxt);
2366 }
2367