1 /* RTL-based forward propagation pass for GNU compiler.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Paolo Bonzini and Steven Bosscher.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "rtl-ssa.h"
30
31 #include "predict.h"
32 #include "cfgrtl.h"
33 #include "cfgcleanup.h"
34 #include "cfgloop.h"
35 #include "tree-pass.h"
36 #include "rtl-iter.h"
37 #include "target.h"
38
39 /* This pass does simple forward propagation and simplification when an
40 operand of an insn can only come from a single def. This pass uses
41 RTL SSA, so it is global. However, we only do limited analysis of
42 available expressions.
43
44 1) The pass tries to propagate the source of the def into the use,
45 and checks if the result is independent of the substituted value.
46 For example, the high word of a (zero_extend:DI (reg:SI M)) is always
47 zero, independent of the source register.
48
49 In particular, we propagate constants into the use site. Sometimes
50 RTL expansion did not put the constant in the same insn on purpose,
51 to satisfy a predicate, and the result will fail to be recognized;
52 but this happens rarely and in this case we can still create a
53 REG_EQUAL note. For multi-word operations, this
54
55 (set (subreg:SI (reg:DI 120) 0) (const_int 0))
56 (set (subreg:SI (reg:DI 120) 4) (const_int -1))
57 (set (subreg:SI (reg:DI 122) 0)
58 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
59 (set (subreg:SI (reg:DI 122) 4)
60 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
61
62 can be simplified to the much simpler
63
64 (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
65 (set (subreg:SI (reg:DI 122) 4) (const_int -1))
66
67 This particular propagation is also effective at putting together
68 complex addressing modes. We are more aggressive inside MEMs, in
69 that all definitions are propagated if the use is in a MEM; if the
70 result is a valid memory address we check address_cost to decide
71 whether the substitution is worthwhile.
72
73 2) The pass propagates register copies. This is not as effective as
74 the copy propagation done by CSE's canon_reg, which works by walking
75 the instruction chain, it can help the other transformations.
76
77 We should consider removing this optimization, and instead reorder the
78 RTL passes, because GCSE does this transformation too. With some luck,
79 the CSE pass at the end of rest_of_handle_gcse could also go away.
80
81 3) The pass looks for paradoxical subregs that are actually unnecessary.
82 Things like this:
83
84 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
85 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
86 (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
87 (subreg:SI (reg:QI 121) 0)))
88
89 are very common on machines that can only do word-sized operations.
90 For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
91 if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
92 we can replace the paradoxical subreg with simply (reg:WIDE M). The
93 above will simplify this to
94
95 (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
96 (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
97 (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
98
99 where the first two insns are now dead. */
100
101 using namespace rtl_ssa;
102
103 static int num_changes;
104
105 /* Do not try to replace constant addresses or addresses of local and
106 argument slots. These MEM expressions are made only once and inserted
107 in many instructions, as well as being used to control symbol table
108 output. It is not safe to clobber them.
109
110 There are some uncommon cases where the address is already in a register
111 for some reason, but we cannot take advantage of that because we have
112 no easy way to unshare the MEM. In addition, looking up all stack
113 addresses is costly. */
114
115 static bool
can_simplify_addr(rtx addr)116 can_simplify_addr (rtx addr)
117 {
118 rtx reg;
119
120 if (CONSTANT_ADDRESS_P (addr))
121 return false;
122
123 if (GET_CODE (addr) == PLUS)
124 reg = XEXP (addr, 0);
125 else
126 reg = addr;
127
128 return (!REG_P (reg)
129 || (REGNO (reg) != FRAME_POINTER_REGNUM
130 && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
131 && REGNO (reg) != ARG_POINTER_REGNUM));
132 }
133
134 /* MEM is the result of an address simplification, and temporarily
135 undoing changes OLD_NUM_CHANGES onwards restores the original address.
136 Return whether it is good to use the new address instead of the
137 old one. INSN is the containing instruction. */
138
139 static bool
should_replace_address(int old_num_changes,rtx mem,rtx_insn * insn)140 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
141 {
142 int gain;
143
144 /* Prefer the new address if it is less expensive. */
145 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
146 temporarily_undo_changes (old_num_changes);
147 gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
148 MEM_ADDR_SPACE (mem), speed);
149 redo_changes (old_num_changes);
150 gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
151 MEM_ADDR_SPACE (mem), speed);
152
153 /* If the addresses have equivalent cost, prefer the new address
154 if it has the highest `set_src_cost'. That has the potential of
155 eliminating the most insns without additional costs, and it
156 is the same that cse.c used to do. */
157 if (gain == 0)
158 {
159 gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
160 temporarily_undo_changes (old_num_changes);
161 gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
162 redo_changes (old_num_changes);
163 }
164
165 return (gain > 0);
166 }
167
168
169 namespace
170 {
171 class fwprop_propagation : public insn_propagation
172 {
173 public:
174 static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
175 static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
176 static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
177
178 fwprop_propagation (insn_info *, set_info *, rtx, rtx);
179
changed_mem_p()180 bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
181 bool folded_to_constants_p () const;
182 bool profitable_p () const;
183
184 bool check_mem (int, rtx) final override;
185 void note_simplification (int, uint16_t, rtx, rtx) final override;
186 uint16_t classify_result (rtx, rtx);
187
188 private:
189 const bool single_use_p;
190 const bool single_ebb_p;
191 };
192 }
193
194 /* Prepare to replace FROM with TO in USE_INSN. */
195
fwprop_propagation(insn_info * use_insn,set_info * def,rtx from,rtx to)196 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
197 set_info *def, rtx from, rtx to)
198 : insn_propagation (use_insn->rtl (), from, to),
199 single_use_p (def->single_nondebug_use ()),
200 single_ebb_p (use_insn->ebb () == def->ebb ())
201 {
202 should_check_mems = true;
203 should_note_simplifications = true;
204 }
205
206 /* MEM is the result of an address simplification, and temporarily
207 undoing changes OLD_NUM_CHANGES onwards restores the original address.
208 Return true if the propagation should continue, false if it has failed. */
209
210 bool
check_mem(int old_num_changes,rtx mem)211 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
212 {
213 if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
214 MEM_ADDR_SPACE (mem)))
215 {
216 failure_reason = "would create an invalid MEM";
217 return false;
218 }
219
220 temporarily_undo_changes (old_num_changes);
221 bool can_simplify = can_simplify_addr (XEXP (mem, 0));
222 redo_changes (old_num_changes);
223 if (!can_simplify)
224 {
225 failure_reason = "would replace a frame address";
226 return false;
227 }
228
229 /* Copy propagations are always ok. Otherwise check the costs. */
230 if (!(REG_P (from) && REG_P (to))
231 && !should_replace_address (old_num_changes, mem, insn))
232 {
233 failure_reason = "would increase the cost of a MEM";
234 return false;
235 }
236
237 result_flags |= CHANGED_MEM;
238 return true;
239 }
240
241 /* OLDX has been simplified to NEWX. Describe the change in terms of
242 result_flags. */
243
244 uint16_t
classify_result(rtx old_rtx,rtx new_rtx)245 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
246 {
247 if (CONSTANT_P (new_rtx))
248 {
249 /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
250 and NEW_RTX is likely not a legitimate address. We want it to
251 disappear if it is invalid.
252
253 ??? Using the mode of the LO_SUM as the mode of the address
254 seems odd, but it was what the pre-SSA code did. */
255 if (GET_CODE (old_rtx) == LO_SUM
256 && !memory_address_p (GET_MODE (old_rtx), new_rtx))
257 return CONSTANT;
258 return CONSTANT | PROFITABLE;
259 }
260
261 /* Allow replacements that simplify operations on a vector or complex
262 value to a component. The most prominent case is
263 (subreg ([vec_]concat ...)). */
264 if (REG_P (new_rtx)
265 && !HARD_REGISTER_P (new_rtx)
266 && (VECTOR_MODE_P (GET_MODE (from))
267 || COMPLEX_MODE_P (GET_MODE (from)))
268 && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
269 return PROFITABLE;
270
271 /* Allow (subreg (mem)) -> (mem) simplifications with the following
272 exceptions:
273 1) Propagating (mem)s into multiple uses is not profitable.
274 2) Propagating (mem)s across EBBs may not be profitable if the source EBB
275 runs less frequently.
276 3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
277 4) Creating new (mem/v)s is not correct, since DCE will not remove the old
278 ones. */
279 if (single_use_p
280 && single_ebb_p
281 && SUBREG_P (old_rtx)
282 && !paradoxical_subreg_p (old_rtx)
283 && MEM_P (new_rtx)
284 && !MEM_VOLATILE_P (new_rtx))
285 return PROFITABLE;
286
287 return 0;
288 }
289
290 /* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES
291 is the number of unrelated changes that had been made before processing
292 OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags
293 had at that point. */
294
295 void
note_simplification(int old_num_changes,uint16_t old_result_flags,rtx old_rtx,rtx new_rtx)296 fwprop_propagation::note_simplification (int old_num_changes,
297 uint16_t old_result_flags,
298 rtx old_rtx, rtx new_rtx)
299 {
300 result_flags &= ~(CONSTANT | PROFITABLE);
301 uint16_t new_flags = classify_result (old_rtx, new_rtx);
302 if (old_num_changes)
303 new_flags &= old_result_flags;
304 result_flags |= new_flags;
305 }
306
307 /* Return true if all substitutions eventually folded to constants. */
308
309 bool
folded_to_constants_p()310 fwprop_propagation::folded_to_constants_p () const
311 {
312 /* If we're propagating a HIGH, require it to be folded with a
313 partnering LO_SUM. For example, a REG_EQUAL note with a register
314 replaced by an unfolded HIGH is not useful. */
315 if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
316 return true;
317 return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
318 }
319
320
321 /* Return true if it is worth keeping the result of the propagation,
322 false if it would increase the complexity of the pattern too much. */
323
324 bool
profitable_p()325 fwprop_propagation::profitable_p () const
326 {
327 if (changed_mem_p ())
328 return true;
329
330 if (!(result_flags & UNSIMPLIFIED)
331 && (result_flags & PROFITABLE))
332 return true;
333
334 if (REG_P (to))
335 return true;
336
337 if (GET_CODE (to) == SUBREG
338 && REG_P (SUBREG_REG (to))
339 && !paradoxical_subreg_p (to))
340 return true;
341
342 if (CONSTANT_P (to))
343 return true;
344
345 return false;
346 }
347
348 /* Check that X has a single def. */
349
350 static bool
reg_single_def_p(rtx x)351 reg_single_def_p (rtx x)
352 {
353 return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
354 }
355
356 /* Return true if X contains a paradoxical subreg. */
357
358 static bool
contains_paradoxical_subreg_p(rtx x)359 contains_paradoxical_subreg_p (rtx x)
360 {
361 subrtx_var_iterator::array_type array;
362 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
363 {
364 x = *iter;
365 if (SUBREG_P (x) && paradoxical_subreg_p (x))
366 return true;
367 }
368 return false;
369 }
370
371 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
372 USE_INSN. Return the number of substitutions on success, otherwise return
373 -1 and leave USE_INSN unchanged.
374
375 If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
376 to fold to a constant, so that the note does not use any more registers
377 than it did previously. If REQUIRE_CONSTANT is false, also allow the
378 substitution if it's something we'd normally allow for the main
379 instruction pattern. */
380
381 static int
try_fwprop_subst_note(insn_info * use_insn,set_info * def,rtx note,rtx dest,rtx src,bool require_constant)382 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
383 rtx note, rtx dest, rtx src, bool require_constant)
384 {
385 rtx_insn *use_rtl = use_insn->rtl ();
386 insn_info *def_insn = def->insn ();
387
388 insn_change_watermark watermark;
389 fwprop_propagation prop (use_insn, def, dest, src);
390 if (!prop.apply_to_rvalue (&XEXP (note, 0)))
391 {
392 if (dump_file && (dump_flags & TDF_DETAILS))
393 fprintf (dump_file, "cannot propagate from insn %d into"
394 " notes of insn %d: %s\n", def_insn->uid (),
395 use_insn->uid (), prop.failure_reason);
396 return -1;
397 }
398
399 if (prop.num_replacements == 0)
400 return 0;
401
402 if (require_constant)
403 {
404 if (!prop.folded_to_constants_p ())
405 {
406 if (dump_file && (dump_flags & TDF_DETAILS))
407 fprintf (dump_file, "cannot propagate from insn %d into"
408 " notes of insn %d: %s\n", def_insn->uid (),
409 use_insn->uid (), "wouldn't fold to constants");
410 return -1;
411 }
412 }
413 else
414 {
415 if (!prop.folded_to_constants_p () && !prop.profitable_p ())
416 {
417 if (dump_file && (dump_flags & TDF_DETAILS))
418 fprintf (dump_file, "cannot propagate from insn %d into"
419 " notes of insn %d: %s\n", def_insn->uid (),
420 use_insn->uid (), "would increase complexity of node");
421 return -1;
422 }
423 }
424
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 {
427 fprintf (dump_file, "\nin notes of insn %d, replacing:\n ",
428 INSN_UID (use_rtl));
429 temporarily_undo_changes (0);
430 print_inline_rtx (dump_file, note, 2);
431 redo_changes (0);
432 fprintf (dump_file, "\n with:\n ");
433 print_inline_rtx (dump_file, note, 2);
434 fprintf (dump_file, "\n");
435 }
436 watermark.keep ();
437 return prop.num_replacements;
438 }
439
440 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
441 USE_INSN's pattern. Return true on success, otherwise leave USE_INSN
442 unchanged. */
443
444 static bool
try_fwprop_subst_pattern(obstack_watermark & attempt,insn_change & use_change,set_info * def,rtx * loc,rtx dest,rtx src)445 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
446 set_info *def, rtx *loc, rtx dest, rtx src)
447 {
448 insn_info *use_insn = use_change.insn ();
449 rtx_insn *use_rtl = use_insn->rtl ();
450 insn_info *def_insn = def->insn ();
451
452 insn_change_watermark watermark;
453 fwprop_propagation prop (use_insn, def, dest, src);
454 if (!prop.apply_to_pattern (loc))
455 {
456 if (dump_file && (dump_flags & TDF_DETAILS))
457 fprintf (dump_file, "cannot propagate from insn %d into"
458 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
459 prop.failure_reason);
460 return false;
461 }
462
463 if (prop.num_replacements == 0)
464 return false;
465
466 if (!prop.profitable_p ())
467 {
468 if (dump_file && (dump_flags & TDF_DETAILS))
469 fprintf (dump_file, "cannot propagate from insn %d into"
470 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
471 "would increase complexity of pattern");
472 return false;
473 }
474
475 if (dump_file && (dump_flags & TDF_DETAILS))
476 {
477 fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
478 def_insn->uid (), use_insn->uid ());
479 temporarily_undo_changes (0);
480 print_rtl_single (dump_file, PATTERN (use_rtl));
481 redo_changes (0);
482 }
483
484 /* ??? In theory, it should be better to use insn costs rather than
485 set_src_costs here. That would involve replacing this code with
486 change_is_worthwhile. */
487 bool ok = recog (attempt, use_change);
488 if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
489 if (rtx use_set = single_set (use_rtl))
490 {
491 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
492 temporarily_undo_changes (0);
493 auto old_cost = set_src_cost (SET_SRC (use_set),
494 GET_MODE (SET_DEST (use_set)), speed);
495 redo_changes (0);
496 auto new_cost = set_src_cost (SET_SRC (use_set),
497 GET_MODE (SET_DEST (use_set)), speed);
498 if (new_cost > old_cost)
499 {
500 if (dump_file)
501 fprintf (dump_file, "change not profitable"
502 " (cost %d -> cost %d)\n", old_cost, new_cost);
503 ok = false;
504 }
505 }
506
507 if (!ok)
508 {
509 /* The pattern didn't match, but if all uses of SRC folded to
510 constants, we can add a REG_EQUAL note for the result, if there
511 isn't one already. */
512 if (!prop.folded_to_constants_p ())
513 return false;
514
515 /* Test this first to avoid creating an unnecessary copy of SRC. */
516 if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
517 return false;
518
519 rtx set = set_for_reg_notes (use_rtl);
520 if (!set || !REG_P (SET_DEST (set)))
521 return false;
522
523 rtx value = copy_rtx (SET_SRC (set));
524 cancel_changes (0);
525
526 /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
527 because the bits in there can be anything and so might not
528 match the REG_EQUAL note content. See PR70574. */
529 if (contains_paradoxical_subreg_p (SET_SRC (set)))
530 return false;
531
532 if (dump_file && (dump_flags & TDF_DETAILS))
533 fprintf (dump_file, " Setting REG_EQUAL note\n");
534
535 return set_unique_reg_note (use_rtl, REG_EQUAL, value);
536 }
537
538 rtx *note_ptr = ®_NOTES (use_rtl);
539 while (rtx note = *note_ptr)
540 {
541 if ((REG_NOTE_KIND (note) == REG_EQUAL
542 || REG_NOTE_KIND (note) == REG_EQUIV)
543 && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
544 {
545 *note_ptr = XEXP (note, 1);
546 free_EXPR_LIST_node (note);
547 }
548 else
549 note_ptr = &XEXP (note, 1);
550 }
551
552 confirm_change_group ();
553 crtl->ssa->change_insn (use_change);
554 num_changes++;
555 return true;
556 }
557
558 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
559 given that it was not possible to do this for USE_INSN's main pattern.
560 Return true on success, otherwise leave USE_INSN unchanged. */
561
562 static bool
try_fwprop_subst_notes(insn_info * use_insn,set_info * def,rtx dest,rtx src)563 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
564 rtx dest, rtx src)
565 {
566 rtx_insn *use_rtl = use_insn->rtl ();
567 for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
568 if ((REG_NOTE_KIND (note) == REG_EQUAL
569 || REG_NOTE_KIND (note) == REG_EQUIV)
570 && try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
571 {
572 confirm_change_group ();
573 return true;
574 }
575
576 return false;
577 }
578
579 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
580 into USE. If so, first try performing the substitution in location LOC
581 of USE->insn ()'s pattern. If that fails, try instead to substitute
582 into the notes.
583
584 Return true on success, otherwise leave USE_INSN unchanged. */
585
586 static bool
try_fwprop_subst(use_info * use,set_info * def,rtx * loc,rtx dest,rtx src)587 try_fwprop_subst (use_info *use, set_info *def,
588 rtx *loc, rtx dest, rtx src)
589 {
590 insn_info *use_insn = use->insn ();
591 insn_info *def_insn = def->insn ();
592
593 auto attempt = crtl->ssa->new_change_attempt ();
594 use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
595
596 /* ??? Not really a meaningful test: it means we can propagate arithmetic
597 involving hard registers but not bare references to them. A better
598 test would be to iterate over src_uses looking for hard registers
599 that are not fixed. */
600 if (REG_P (src) && HARD_REGISTER_P (src))
601 return false;
602
603 /* ??? It would be better to make this EBB-based instead. That would
604 involve checking for equal EBBs rather than equal BBs and trying
605 to make the uses available at use_insn->ebb ()->first_bb (). */
606 if (def_insn->bb () != use_insn->bb ())
607 {
608 src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
609 use_insn->bb ());
610 if (!src_uses.is_valid ())
611 return false;
612 }
613
614 insn_change use_change (use_insn);
615 use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
616 src_uses);
617 if (!use_change.new_uses.is_valid ())
618 return false;
619
620 /* ??? We could allow movement within the EBB by adding:
621
622 use_change.move_range = use_insn->ebb ()->insn_range (); */
623 if (!restrict_movement (use_change))
624 return false;
625
626 return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
627 || try_fwprop_subst_notes (use_insn, def, dest, src));
628 }
629
630 /* For the given single_set INSN, containing SRC known to be a
631 ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
632 is redundant due to the register being set by a LOAD_EXTEND_OP
633 load from memory. */
634
635 static bool
free_load_extend(rtx src,insn_info * insn)636 free_load_extend (rtx src, insn_info *insn)
637 {
638 rtx reg = XEXP (src, 0);
639 if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
640 return false;
641
642 def_info *def = nullptr;
643 for (use_info *use : insn->uses ())
644 if (use->regno () == REGNO (reg))
645 {
646 def = use->def ();
647 break;
648 }
649
650 if (!def)
651 return false;
652
653 insn_info *def_insn = def->insn ();
654 if (def_insn->is_artificial ())
655 return false;
656
657 rtx_insn *def_rtl = def_insn->rtl ();
658 if (NONJUMP_INSN_P (def_rtl))
659 {
660 rtx patt = PATTERN (def_rtl);
661
662 if (GET_CODE (patt) == SET
663 && GET_CODE (SET_SRC (patt)) == MEM
664 && rtx_equal_p (SET_DEST (patt), reg))
665 return true;
666 }
667 return false;
668 }
669
670 /* Subroutine of forward_propagate_subreg that handles a use of DEST
671 in REF. The other parameters are the same. */
672
673 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src,df_ref ref)674 forward_propagate_subreg (use_info *use, set_info *def,
675 rtx dest, rtx src, df_ref ref)
676 {
677 scalar_int_mode int_use_mode, src_mode;
678
679 /* Only consider subregs... */
680 rtx use_reg = DF_REF_REG (ref);
681 machine_mode use_mode = GET_MODE (use_reg);
682 if (GET_CODE (use_reg) != SUBREG
683 || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
684 return false;
685
686 /* ??? Replacing throughout the pattern would help for match_dups. */
687 rtx *loc = DF_REF_LOC (ref);
688 if (paradoxical_subreg_p (use_reg))
689 {
690 /* If this is a paradoxical SUBREG, we have no idea what value the
691 extra bits would have. However, if the operand is equivalent to
692 a SUBREG whose operand is the same as our mode, and all the modes
693 are within a word, we can just use the inner operand because
694 these SUBREGs just say how to treat the register. */
695 if (GET_CODE (src) == SUBREG
696 && REG_P (SUBREG_REG (src))
697 && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
698 && GET_MODE (SUBREG_REG (src)) == use_mode
699 && subreg_lowpart_p (src))
700 return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
701 }
702
703 /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
704 is the low part of the reg being extended then just use the inner
705 operand. Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
706 be removed due to it matching a LOAD_EXTEND_OP load from memory,
707 or due to the operation being a no-op when applied to registers.
708 For example, if we have:
709
710 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
711 B: (... (subreg:SI (reg:DI X)) ...)
712
713 and mode_rep_extended says that Y is already sign-extended,
714 the backend will typically allow A to be combined with the
715 definition of Y or, failing that, allow A to be deleted after
716 reload through register tying. Introducing more uses of Y
717 prevents both optimisations. */
718 else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
719 && subreg_lowpart_p (use_reg))
720 {
721 if ((GET_CODE (src) == ZERO_EXTEND
722 || GET_CODE (src) == SIGN_EXTEND)
723 && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
724 && REG_P (XEXP (src, 0))
725 && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
726 && GET_MODE (XEXP (src, 0)) == use_mode
727 && !free_load_extend (src, def->insn ())
728 && (targetm.mode_rep_extended (int_use_mode, src_mode)
729 != (int) GET_CODE (src)))
730 return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
731 }
732
733 return false;
734 }
735
736 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
737 the result, handling cases where DEST is used in a subreg and where
738 applying that subreg to SRC results in a useful simplification. */
739
740 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src)741 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
742 {
743 if (!use->includes_subregs () || !REG_P (dest))
744 return false;
745
746 if (GET_CODE (src) != SUBREG
747 && GET_CODE (src) != ZERO_EXTEND
748 && GET_CODE (src) != SIGN_EXTEND)
749 return false;
750
751 rtx_insn *use_rtl = use->insn ()->rtl ();
752 df_ref ref;
753
754 FOR_EACH_INSN_USE (ref, use_rtl)
755 if (DF_REF_REGNO (ref) == use->regno ()
756 && forward_propagate_subreg (use, def, dest, src, ref))
757 return true;
758
759 FOR_EACH_INSN_EQ_USE (ref, use_rtl)
760 if (DF_REF_REGNO (ref) == use->regno ()
761 && forward_propagate_subreg (use, def, dest, src, ref))
762 return true;
763
764 return false;
765 }
766
767 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
768 simplify the result. */
769
770 static bool
forward_propagate_and_simplify(use_info * use,set_info * def,rtx dest,rtx src)771 forward_propagate_and_simplify (use_info *use, set_info *def,
772 rtx dest, rtx src)
773 {
774 insn_info *use_insn = use->insn ();
775 rtx_insn *use_rtl = use_insn->rtl ();
776 insn_info *def_insn = def->insn ();
777
778 /* ??? This check seems unnecessary. We should be able to propagate
779 into any kind of instruction, regardless of whether it's a single set.
780 It seems odd to be more permissive with asms than normal instructions. */
781 bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
782 rtx use_set = single_set (use_rtl);
783 if (need_single_set && !use_set)
784 return false;
785
786 /* Do not propagate into PC, CC0, etc.
787
788 ??? This too seems unnecessary. The current code should work correctly
789 without it, including cases where jumps become unconditional. */
790 if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
791 return false;
792
793 /* In __asm don't replace if src might need more registers than
794 reg, as that could increase register pressure on the __asm. */
795 if (use_insn->is_asm () && def_insn->uses ().size () > 1)
796 return false;
797
798 /* Check if the def is loading something from the constant pool; in this
799 case we would undo optimization such as compress_float_constant.
800 Still, we can set a REG_EQUAL note. */
801 if (MEM_P (src) && MEM_READONLY_P (src))
802 {
803 rtx x = avoid_constant_pool_reference (src);
804 rtx note_set;
805 if (x != src
806 && (note_set = set_for_reg_notes (use_rtl))
807 && REG_P (SET_DEST (note_set))
808 && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
809 {
810 rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
811 rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
812 rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
813 if (old_rtx != new_rtx)
814 set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
815 }
816 return false;
817 }
818
819 /* ??? Unconditionally propagating into PATTERN would work better
820 for instructions that have match_dups. */
821 rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
822 return try_fwprop_subst (use, def, loc, dest, src);
823 }
824
825 /* Given a use USE of an insn, if it has a single reaching
826 definition, try to forward propagate it into that insn.
827 Return true if something changed.
828
829 REG_PROP_ONLY is true if we should only propagate register copies. */
830
831 static bool
832 forward_propagate_into (use_info *use, bool reg_prop_only = false)
833 {
834 if (use->includes_read_writes ())
835 return false;
836
837 /* Disregard uninitialized uses. */
838 set_info *def = use->def ();
839 if (!def)
840 return false;
841
842 /* Only consider single-register definitions. This could be relaxed,
843 but it should rarely be needed before RA. */
844 def = look_through_degenerate_phi (def);
845 if (def->includes_multiregs ())
846 return false;
847
848 /* Only consider uses whose definition comes from a real instruction. */
849 insn_info *def_insn = def->insn ();
850 if (def_insn->is_artificial ())
851 return false;
852
853 rtx_insn *def_rtl = def_insn->rtl ();
854 if (!NONJUMP_INSN_P (def_rtl))
855 return false;
856 /* ??? This seems an unnecessary restriction. We can easily tell
857 which set the definition comes from. */
858 if (multiple_sets (def_rtl))
859 return false;
860 rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
861 if (!def_set)
862 return false;
863
864 rtx dest = SET_DEST (def_set);
865 rtx src = SET_SRC (def_set);
866
867 /* Allow propagations into a loop only for reg-to-reg copies, since
868 replacing one register by another shouldn't increase the cost. */
869 struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
870 struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
871 if ((reg_prop_only || def_loop != use_loop)
872 && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
873 return false;
874
875 /* Don't substitute into a non-local goto, this confuses CFG. */
876 insn_info *use_insn = use->insn ();
877 rtx_insn *use_rtl = use_insn->rtl ();
878 if (JUMP_P (use_rtl)
879 && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
880 return false;
881
882 if (forward_propagate_and_simplify (use, def, dest, src)
883 || forward_propagate_subreg (use, def, dest, src))
884 return true;
885
886 return false;
887 }
888
889 static void
fwprop_init(void)890 fwprop_init (void)
891 {
892 num_changes = 0;
893 calculate_dominance_info (CDI_DOMINATORS);
894
895 /* We do not always want to propagate into loops, so we have to find
896 loops and be careful about them. Avoid CFG modifications so that
897 we don't have to update dominance information afterwards for
898 build_single_def_use_links. */
899 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
900
901 df_analyze ();
902 crtl->ssa = new rtl_ssa::function_info (cfun);
903 }
904
905 static void
fwprop_done(void)906 fwprop_done (void)
907 {
908 loop_optimizer_finalize ();
909
910 crtl->ssa->perform_pending_updates ();
911 free_dominance_info (CDI_DOMINATORS);
912 cleanup_cfg (0);
913
914 delete crtl->ssa;
915 crtl->ssa = nullptr;
916
917 delete_trivially_dead_insns (get_insns (), max_reg_num ());
918
919 if (dump_file)
920 fprintf (dump_file,
921 "\nNumber of successful forward propagations: %d\n\n",
922 num_changes);
923 }
924
925 /* Try to optimize INSN, returning true if something changes.
926 FWPROP_ADDR_P is true if we are running fwprop_addr rather than
927 the full fwprop. */
928
929 static bool
fwprop_insn(insn_info * insn,bool fwprop_addr_p)930 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
931 {
932 for (use_info *use : insn->uses ())
933 {
934 if (use->is_mem ())
935 continue;
936 /* ??? The choices here follow those in the pre-SSA code. */
937 if (!use->includes_address_uses ())
938 {
939 if (forward_propagate_into (use, fwprop_addr_p))
940 return true;
941 }
942 else
943 {
944 struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
945 /* The outermost loop is not really a loop. */
946 if (loop == NULL || loop_outer (loop) == NULL)
947 {
948 if (forward_propagate_into (use, fwprop_addr_p))
949 return true;
950 }
951 else if (fwprop_addr_p)
952 {
953 if (forward_propagate_into (use, false))
954 return true;
955 }
956 }
957 }
958 return false;
959 }
960
961 /* Main entry point. */
962
963 static bool
gate_fwprop(void)964 gate_fwprop (void)
965 {
966 return optimize > 0 && flag_forward_propagate;
967 }
968
969 static unsigned int
fwprop(bool fwprop_addr_p)970 fwprop (bool fwprop_addr_p)
971 {
972 fwprop_init ();
973
974 /* Go through all the instructions (including debug instructions) looking
975 for uses that we could propagate into.
976
977 Do not forward propagate addresses into loops until after unrolling.
978 CSE did so because it was able to fix its own mess, but we are not. */
979
980 insn_info *next;
981
982 /* ??? This code uses a worklist in order to preserve the behavior
983 of the pre-SSA implementation. It would be better to instead
984 iterate on each instruction until no more propagations are
985 possible, then move on to the next. */
986 auto_vec<insn_info *> worklist;
987 for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
988 {
989 next = insn->next_any_insn ();
990 if (insn->can_be_optimized () || insn->is_debug_insn ())
991 if (fwprop_insn (insn, fwprop_addr_p))
992 worklist.safe_push (insn);
993 }
994 for (unsigned int i = 0; i < worklist.length (); ++i)
995 {
996 insn_info *insn = worklist[i];
997 if (fwprop_insn (insn, fwprop_addr_p))
998 worklist.safe_push (insn);
999 }
1000
1001 fwprop_done ();
1002 return 0;
1003 }
1004
1005 namespace {
1006
1007 const pass_data pass_data_rtl_fwprop =
1008 {
1009 RTL_PASS, /* type */
1010 "fwprop1", /* name */
1011 OPTGROUP_NONE, /* optinfo_flags */
1012 TV_FWPROP, /* tv_id */
1013 0, /* properties_required */
1014 0, /* properties_provided */
1015 0, /* properties_destroyed */
1016 0, /* todo_flags_start */
1017 TODO_df_finish, /* todo_flags_finish */
1018 };
1019
1020 class pass_rtl_fwprop : public rtl_opt_pass
1021 {
1022 public:
pass_rtl_fwprop(gcc::context * ctxt)1023 pass_rtl_fwprop (gcc::context *ctxt)
1024 : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1025 {}
1026
1027 /* opt_pass methods: */
gate(function *)1028 virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1029 virtual unsigned int execute (function *) { return fwprop (false); }
1030
1031 }; // class pass_rtl_fwprop
1032
1033 } // anon namespace
1034
1035 rtl_opt_pass *
make_pass_rtl_fwprop(gcc::context * ctxt)1036 make_pass_rtl_fwprop (gcc::context *ctxt)
1037 {
1038 return new pass_rtl_fwprop (ctxt);
1039 }
1040
1041 namespace {
1042
1043 const pass_data pass_data_rtl_fwprop_addr =
1044 {
1045 RTL_PASS, /* type */
1046 "fwprop2", /* name */
1047 OPTGROUP_NONE, /* optinfo_flags */
1048 TV_FWPROP, /* tv_id */
1049 0, /* properties_required */
1050 0, /* properties_provided */
1051 0, /* properties_destroyed */
1052 0, /* todo_flags_start */
1053 TODO_df_finish, /* todo_flags_finish */
1054 };
1055
1056 class pass_rtl_fwprop_addr : public rtl_opt_pass
1057 {
1058 public:
pass_rtl_fwprop_addr(gcc::context * ctxt)1059 pass_rtl_fwprop_addr (gcc::context *ctxt)
1060 : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1061 {}
1062
1063 /* opt_pass methods: */
gate(function *)1064 virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1065 virtual unsigned int execute (function *) { return fwprop (true); }
1066
1067 }; // class pass_rtl_fwprop_addr
1068
1069 } // anon namespace
1070
1071 rtl_opt_pass *
make_pass_rtl_fwprop_addr(gcc::context * ctxt)1072 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1073 {
1074 return new pass_rtl_fwprop_addr (ctxt);
1075 }
1076