1 /* RTL-based forward propagation pass for GNU compiler.
2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
3    Contributed by Paolo Bonzini and Steven Bosscher.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #define INCLUDE_ALGORITHM
22 #define INCLUDE_FUNCTIONAL
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "rtl-ssa.h"
30 
31 #include "predict.h"
32 #include "cfgrtl.h"
33 #include "cfgcleanup.h"
34 #include "cfgloop.h"
35 #include "tree-pass.h"
36 #include "rtl-iter.h"
37 #include "target.h"
38 
39 /* This pass does simple forward propagation and simplification when an
40    operand of an insn can only come from a single def.  This pass uses
41    RTL SSA, so it is global.  However, we only do limited analysis of
42    available expressions.
43 
44    1) The pass tries to propagate the source of the def into the use,
45    and checks if the result is independent of the substituted value.
46    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
47    zero, independent of the source register.
48 
49    In particular, we propagate constants into the use site.  Sometimes
50    RTL expansion did not put the constant in the same insn on purpose,
51    to satisfy a predicate, and the result will fail to be recognized;
52    but this happens rarely and in this case we can still create a
53    REG_EQUAL note.  For multi-word operations, this
54 
55       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
56       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
57       (set (subreg:SI (reg:DI 122) 0)
58 	 (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
59       (set (subreg:SI (reg:DI 122) 4)
60 	 (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
61 
62    can be simplified to the much simpler
63 
64       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
65       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
66 
67    This particular propagation is also effective at putting together
68    complex addressing modes.  We are more aggressive inside MEMs, in
69    that all definitions are propagated if the use is in a MEM; if the
70    result is a valid memory address we check address_cost to decide
71    whether the substitution is worthwhile.
72 
73    2) The pass propagates register copies.  This is not as effective as
74    the copy propagation done by CSE's canon_reg, which works by walking
75    the instruction chain, it can help the other transformations.
76 
77    We should consider removing this optimization, and instead reorder the
78    RTL passes, because GCSE does this transformation too.  With some luck,
79    the CSE pass at the end of rest_of_handle_gcse could also go away.
80 
81    3) The pass looks for paradoxical subregs that are actually unnecessary.
82    Things like this:
83 
84      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
85      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
86      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
87 				(subreg:SI (reg:QI 121) 0)))
88 
89    are very common on machines that can only do word-sized operations.
90    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
91    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
92    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
93    above will simplify this to
94 
95      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
96      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
97      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
98 
99    where the first two insns are now dead.  */
100 
101 using namespace rtl_ssa;
102 
103 static int num_changes;
104 
105 /* Do not try to replace constant addresses or addresses of local and
106    argument slots.  These MEM expressions are made only once and inserted
107    in many instructions, as well as being used to control symbol table
108    output.  It is not safe to clobber them.
109 
110    There are some uncommon cases where the address is already in a register
111    for some reason, but we cannot take advantage of that because we have
112    no easy way to unshare the MEM.  In addition, looking up all stack
113    addresses is costly.  */
114 
115 static bool
can_simplify_addr(rtx addr)116 can_simplify_addr (rtx addr)
117 {
118   rtx reg;
119 
120   if (CONSTANT_ADDRESS_P (addr))
121     return false;
122 
123   if (GET_CODE (addr) == PLUS)
124     reg = XEXP (addr, 0);
125   else
126     reg = addr;
127 
128   return (!REG_P (reg)
129 	  || (REGNO (reg) != FRAME_POINTER_REGNUM
130 	      && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
131 	      && REGNO (reg) != ARG_POINTER_REGNUM));
132 }
133 
134 /* MEM is the result of an address simplification, and temporarily
135    undoing changes OLD_NUM_CHANGES onwards restores the original address.
136    Return whether it is good to use the new address instead of the
137    old one.  INSN is the containing instruction.  */
138 
139 static bool
should_replace_address(int old_num_changes,rtx mem,rtx_insn * insn)140 should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn)
141 {
142   int gain;
143 
144   /* Prefer the new address if it is less expensive.  */
145   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
146   temporarily_undo_changes (old_num_changes);
147   gain = address_cost (XEXP (mem, 0), GET_MODE (mem),
148 		       MEM_ADDR_SPACE (mem), speed);
149   redo_changes (old_num_changes);
150   gain -= address_cost (XEXP (mem, 0), GET_MODE (mem),
151 			MEM_ADDR_SPACE (mem), speed);
152 
153   /* If the addresses have equivalent cost, prefer the new address
154      if it has the highest `set_src_cost'.  That has the potential of
155      eliminating the most insns without additional costs, and it
156      is the same that cse.c used to do.  */
157   if (gain == 0)
158     {
159       gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed);
160       temporarily_undo_changes (old_num_changes);
161       gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed);
162       redo_changes (old_num_changes);
163     }
164 
165   return (gain > 0);
166 }
167 
168 
169 namespace
170 {
171   class fwprop_propagation : public insn_propagation
172   {
173   public:
174     static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT;
175     static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1;
176     static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2;
177 
178     fwprop_propagation (insn_info *, set_info *, rtx, rtx);
179 
changed_mem_p()180     bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
181     bool folded_to_constants_p () const;
182     bool profitable_p () const;
183 
184     bool check_mem (int, rtx) final override;
185     void note_simplification (int, uint16_t, rtx, rtx) final override;
186     uint16_t classify_result (rtx, rtx);
187 
188   private:
189     const bool single_use_p;
190     const bool single_ebb_p;
191   };
192 }
193 
194 /* Prepare to replace FROM with TO in USE_INSN.  */
195 
fwprop_propagation(insn_info * use_insn,set_info * def,rtx from,rtx to)196 fwprop_propagation::fwprop_propagation (insn_info *use_insn,
197 					set_info *def, rtx from, rtx to)
198   : insn_propagation (use_insn->rtl (), from, to),
199     single_use_p (def->single_nondebug_use ()),
200     single_ebb_p (use_insn->ebb () == def->ebb ())
201 {
202   should_check_mems = true;
203   should_note_simplifications = true;
204 }
205 
206 /* MEM is the result of an address simplification, and temporarily
207    undoing changes OLD_NUM_CHANGES onwards restores the original address.
208    Return true if the propagation should continue, false if it has failed.  */
209 
210 bool
check_mem(int old_num_changes,rtx mem)211 fwprop_propagation::check_mem (int old_num_changes, rtx mem)
212 {
213   if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0),
214 				    MEM_ADDR_SPACE (mem)))
215     {
216       failure_reason = "would create an invalid MEM";
217       return false;
218     }
219 
220   temporarily_undo_changes (old_num_changes);
221   bool can_simplify = can_simplify_addr (XEXP (mem, 0));
222   redo_changes (old_num_changes);
223   if (!can_simplify)
224     {
225       failure_reason = "would replace a frame address";
226       return false;
227     }
228 
229   /* Copy propagations are always ok.  Otherwise check the costs.  */
230   if (!(REG_P (from) && REG_P (to))
231       && !should_replace_address (old_num_changes, mem, insn))
232     {
233       failure_reason = "would increase the cost of a MEM";
234       return false;
235     }
236 
237   result_flags |= CHANGED_MEM;
238   return true;
239 }
240 
241 /* OLDX has been simplified to NEWX.  Describe the change in terms of
242    result_flags.  */
243 
244 uint16_t
classify_result(rtx old_rtx,rtx new_rtx)245 fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx)
246 {
247   if (CONSTANT_P (new_rtx))
248     {
249       /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason,
250 	 and NEW_RTX is likely not a legitimate address.  We want it to
251 	 disappear if it is invalid.
252 
253 	 ??? Using the mode of the LO_SUM as the mode of the address
254 	 seems odd, but it was what the pre-SSA code did.  */
255       if (GET_CODE (old_rtx) == LO_SUM
256 	  && !memory_address_p (GET_MODE (old_rtx), new_rtx))
257 	return CONSTANT;
258       return CONSTANT | PROFITABLE;
259     }
260 
261   /* Allow replacements that simplify operations on a vector or complex
262      value to a component.  The most prominent case is
263      (subreg ([vec_]concat ...)).   */
264   if (REG_P (new_rtx)
265       && !HARD_REGISTER_P (new_rtx)
266       && (VECTOR_MODE_P (GET_MODE (from))
267 	  || COMPLEX_MODE_P (GET_MODE (from)))
268       && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from)))
269     return PROFITABLE;
270 
271   /* Allow (subreg (mem)) -> (mem) simplifications with the following
272      exceptions:
273      1) Propagating (mem)s into multiple uses is not profitable.
274      2) Propagating (mem)s across EBBs may not be profitable if the source EBB
275 	runs less frequently.
276      3) Propagating (mem)s into paradoxical (subreg)s is not profitable.
277      4) Creating new (mem/v)s is not correct, since DCE will not remove the old
278 	ones.  */
279   if (single_use_p
280       && single_ebb_p
281       && SUBREG_P (old_rtx)
282       && !paradoxical_subreg_p (old_rtx)
283       && MEM_P (new_rtx)
284       && !MEM_VOLATILE_P (new_rtx))
285     return PROFITABLE;
286 
287   return 0;
288 }
289 
290 /* Record that OLD_RTX has been simplified to NEW_RTX.  OLD_NUM_CHANGES
291    is the number of unrelated changes that had been made before processing
292    OLD_RTX and its subrtxes.  OLD_RESULT_FLAGS is the value that result_flags
293    had at that point.  */
294 
295 void
note_simplification(int old_num_changes,uint16_t old_result_flags,rtx old_rtx,rtx new_rtx)296 fwprop_propagation::note_simplification (int old_num_changes,
297 					 uint16_t old_result_flags,
298 					 rtx old_rtx, rtx new_rtx)
299 {
300   result_flags &= ~(CONSTANT | PROFITABLE);
301   uint16_t new_flags = classify_result (old_rtx, new_rtx);
302   if (old_num_changes)
303     new_flags &= old_result_flags;
304   result_flags |= new_flags;
305 }
306 
307 /* Return true if all substitutions eventually folded to constants.  */
308 
309 bool
folded_to_constants_p()310 fwprop_propagation::folded_to_constants_p () const
311 {
312   /* If we're propagating a HIGH, require it to be folded with a
313      partnering LO_SUM.  For example, a REG_EQUAL note with a register
314      replaced by an unfolded HIGH is not useful.  */
315   if (CONSTANT_P (to) && GET_CODE (to) != HIGH)
316     return true;
317   return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT);
318 }
319 
320 
321 /* Return true if it is worth keeping the result of the propagation,
322    false if it would increase the complexity of the pattern too much.  */
323 
324 bool
profitable_p()325 fwprop_propagation::profitable_p () const
326 {
327   if (changed_mem_p ())
328     return true;
329 
330   if (!(result_flags & UNSIMPLIFIED)
331       && (result_flags & PROFITABLE))
332     return true;
333 
334   if (REG_P (to))
335     return true;
336 
337   if (GET_CODE (to) == SUBREG
338       && REG_P (SUBREG_REG (to))
339       && !paradoxical_subreg_p (to))
340     return true;
341 
342   if (CONSTANT_P (to))
343     return true;
344 
345   return false;
346 }
347 
348 /* Check that X has a single def.  */
349 
350 static bool
reg_single_def_p(rtx x)351 reg_single_def_p (rtx x)
352 {
353   return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x));
354 }
355 
356 /* Return true if X contains a paradoxical subreg.  */
357 
358 static bool
contains_paradoxical_subreg_p(rtx x)359 contains_paradoxical_subreg_p (rtx x)
360 {
361   subrtx_var_iterator::array_type array;
362   FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST)
363     {
364       x = *iter;
365       if (SUBREG_P (x) && paradoxical_subreg_p (x))
366 	return true;
367     }
368   return false;
369 }
370 
371 /* Try to substitute (set DEST SRC), which defines DEF, into note NOTE of
372    USE_INSN.  Return the number of substitutions on success, otherwise return
373    -1 and leave USE_INSN unchanged.
374 
375    If REQUIRE_CONSTANT is true, require all substituted occurrences of SRC
376    to fold to a constant, so that the note does not use any more registers
377    than it did previously.  If REQUIRE_CONSTANT is false, also allow the
378    substitution if it's something we'd normally allow for the main
379    instruction pattern.  */
380 
381 static int
try_fwprop_subst_note(insn_info * use_insn,set_info * def,rtx note,rtx dest,rtx src,bool require_constant)382 try_fwprop_subst_note (insn_info *use_insn, set_info *def,
383 		       rtx note, rtx dest, rtx src, bool require_constant)
384 {
385   rtx_insn *use_rtl = use_insn->rtl ();
386   insn_info *def_insn = def->insn ();
387 
388   insn_change_watermark watermark;
389   fwprop_propagation prop (use_insn, def, dest, src);
390   if (!prop.apply_to_rvalue (&XEXP (note, 0)))
391     {
392       if (dump_file && (dump_flags & TDF_DETAILS))
393 	fprintf (dump_file, "cannot propagate from insn %d into"
394 		 " notes of insn %d: %s\n", def_insn->uid (),
395 		 use_insn->uid (), prop.failure_reason);
396       return -1;
397     }
398 
399   if (prop.num_replacements == 0)
400     return 0;
401 
402   if (require_constant)
403     {
404       if (!prop.folded_to_constants_p ())
405 	{
406 	  if (dump_file && (dump_flags & TDF_DETAILS))
407 	    fprintf (dump_file, "cannot propagate from insn %d into"
408 		     " notes of insn %d: %s\n", def_insn->uid (),
409 		     use_insn->uid (), "wouldn't fold to constants");
410 	  return -1;
411 	}
412     }
413   else
414     {
415       if (!prop.folded_to_constants_p () && !prop.profitable_p ())
416 	{
417 	  if (dump_file && (dump_flags & TDF_DETAILS))
418 	    fprintf (dump_file, "cannot propagate from insn %d into"
419 		     " notes of insn %d: %s\n", def_insn->uid (),
420 		     use_insn->uid (), "would increase complexity of node");
421 	  return -1;
422 	}
423     }
424 
425   if (dump_file && (dump_flags & TDF_DETAILS))
426     {
427       fprintf (dump_file, "\nin notes of insn %d, replacing:\n  ",
428 	       INSN_UID (use_rtl));
429       temporarily_undo_changes (0);
430       print_inline_rtx (dump_file, note, 2);
431       redo_changes (0);
432       fprintf (dump_file, "\n with:\n  ");
433       print_inline_rtx (dump_file, note, 2);
434       fprintf (dump_file, "\n");
435     }
436   watermark.keep ();
437   return prop.num_replacements;
438 }
439 
440 /* Try to substitute (set DEST SRC), which defines DEF, into location LOC of
441    USE_INSN's pattern.  Return true on success, otherwise leave USE_INSN
442    unchanged.  */
443 
444 static bool
try_fwprop_subst_pattern(obstack_watermark & attempt,insn_change & use_change,set_info * def,rtx * loc,rtx dest,rtx src)445 try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
446 			  set_info *def, rtx *loc, rtx dest, rtx src)
447 {
448   insn_info *use_insn = use_change.insn ();
449   rtx_insn *use_rtl = use_insn->rtl ();
450   insn_info *def_insn = def->insn ();
451 
452   insn_change_watermark watermark;
453   fwprop_propagation prop (use_insn, def, dest, src);
454   if (!prop.apply_to_pattern (loc))
455     {
456       if (dump_file && (dump_flags & TDF_DETAILS))
457 	fprintf (dump_file, "cannot propagate from insn %d into"
458 		 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
459 		 prop.failure_reason);
460       return false;
461     }
462 
463   if (prop.num_replacements == 0)
464     return false;
465 
466   if (!prop.profitable_p ())
467     {
468       if (dump_file && (dump_flags & TDF_DETAILS))
469 	fprintf (dump_file, "cannot propagate from insn %d into"
470 		 " insn %d: %s\n", def_insn->uid (), use_insn->uid (),
471 		 "would increase complexity of pattern");
472       return false;
473     }
474 
475   if (dump_file && (dump_flags & TDF_DETAILS))
476     {
477       fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n",
478 	       def_insn->uid (), use_insn->uid ());
479       temporarily_undo_changes (0);
480       print_rtl_single (dump_file, PATTERN (use_rtl));
481       redo_changes (0);
482     }
483 
484   /* ??? In theory, it should be better to use insn costs rather than
485      set_src_costs here.  That would involve replacing this code with
486      change_is_worthwhile.  */
487   bool ok = recog (attempt, use_change);
488   if (ok && !prop.changed_mem_p () && !use_insn->is_asm ())
489     if (rtx use_set = single_set (use_rtl))
490       {
491 	bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl));
492 	temporarily_undo_changes (0);
493 	auto old_cost = set_src_cost (SET_SRC (use_set),
494 				      GET_MODE (SET_DEST (use_set)), speed);
495 	redo_changes (0);
496 	auto new_cost = set_src_cost (SET_SRC (use_set),
497 				      GET_MODE (SET_DEST (use_set)), speed);
498 	if (new_cost > old_cost)
499 	  {
500 	    if (dump_file)
501 	      fprintf (dump_file, "change not profitable"
502 		       " (cost %d -> cost %d)\n", old_cost, new_cost);
503 	    ok = false;
504 	  }
505       }
506 
507   if (!ok)
508     {
509       /* The pattern didn't match, but if all uses of SRC folded to
510 	 constants, we can add a REG_EQUAL note for the result, if there
511 	 isn't one already.  */
512       if (!prop.folded_to_constants_p ())
513 	return false;
514 
515       /* Test this first to avoid creating an unnecessary copy of SRC.  */
516       if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX))
517 	return false;
518 
519       rtx set = set_for_reg_notes (use_rtl);
520       if (!set || !REG_P (SET_DEST (set)))
521 	return false;
522 
523       rtx value = copy_rtx (SET_SRC (set));
524       cancel_changes (0);
525 
526       /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note,
527 	 because the bits in there can be anything and so might not
528 	 match the REG_EQUAL note content.  See PR70574.  */
529       if (contains_paradoxical_subreg_p (SET_SRC (set)))
530 	return false;
531 
532       if (dump_file && (dump_flags & TDF_DETAILS))
533 	fprintf (dump_file, " Setting REG_EQUAL note\n");
534 
535       return set_unique_reg_note (use_rtl, REG_EQUAL, value);
536     }
537 
538   rtx *note_ptr = &REG_NOTES (use_rtl);
539   while (rtx note = *note_ptr)
540     {
541       if ((REG_NOTE_KIND (note) == REG_EQUAL
542 	   || REG_NOTE_KIND (note) == REG_EQUIV)
543 	  && try_fwprop_subst_note (use_insn, def, note, dest, src, false) < 0)
544 	{
545 	  *note_ptr = XEXP (note, 1);
546 	  free_EXPR_LIST_node (note);
547 	}
548       else
549 	note_ptr = &XEXP (note, 1);
550     }
551 
552   confirm_change_group ();
553   crtl->ssa->change_insn (use_change);
554   num_changes++;
555   return true;
556 }
557 
558 /* Try to substitute (set DEST SRC), which defines DEF, into USE_INSN's notes,
559    given that it was not possible to do this for USE_INSN's main pattern.
560    Return true on success, otherwise leave USE_INSN unchanged.  */
561 
562 static bool
try_fwprop_subst_notes(insn_info * use_insn,set_info * def,rtx dest,rtx src)563 try_fwprop_subst_notes (insn_info *use_insn, set_info *def,
564 			rtx dest, rtx src)
565 {
566   rtx_insn *use_rtl = use_insn->rtl ();
567   for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1))
568     if ((REG_NOTE_KIND (note) == REG_EQUAL
569 	 || REG_NOTE_KIND (note) == REG_EQUIV)
570 	&& try_fwprop_subst_note (use_insn, def, note, dest, src, true) > 0)
571       {
572 	confirm_change_group ();
573 	return true;
574       }
575 
576   return false;
577 }
578 
579 /* Check whether we could validly substitute (set DEST SRC), which defines DEF,
580    into USE.  If so, first try performing the substitution in location LOC
581    of USE->insn ()'s pattern.  If that fails, try instead to substitute
582    into the notes.
583 
584    Return true on success, otherwise leave USE_INSN unchanged.  */
585 
586 static bool
try_fwprop_subst(use_info * use,set_info * def,rtx * loc,rtx dest,rtx src)587 try_fwprop_subst (use_info *use, set_info *def,
588 		  rtx *loc, rtx dest, rtx src)
589 {
590   insn_info *use_insn = use->insn ();
591   insn_info *def_insn = def->insn ();
592 
593   auto attempt = crtl->ssa->new_change_attempt ();
594   use_array src_uses = remove_note_accesses (attempt, def_insn->uses ());
595 
596   /* ??? Not really a meaningful test: it means we can propagate arithmetic
597      involving hard registers but not bare references to them.  A better
598      test would be to iterate over src_uses looking for hard registers
599      that are not fixed.  */
600   if (REG_P (src) && HARD_REGISTER_P (src))
601     return false;
602 
603   /* ??? It would be better to make this EBB-based instead.  That would
604      involve checking for equal EBBs rather than equal BBs and trying
605      to make the uses available at use_insn->ebb ()->first_bb ().  */
606   if (def_insn->bb () != use_insn->bb ())
607     {
608       src_uses = crtl->ssa->make_uses_available (attempt, src_uses,
609 						 use_insn->bb ());
610       if (!src_uses.is_valid ())
611 	return false;
612     }
613 
614   insn_change use_change (use_insn);
615   use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses,
616 					     src_uses);
617   if (!use_change.new_uses.is_valid ())
618     return false;
619 
620   /* ??? We could allow movement within the EBB by adding:
621 
622      use_change.move_range = use_insn->ebb ()->insn_range ();  */
623   if (!restrict_movement (use_change))
624     return false;
625 
626   return (try_fwprop_subst_pattern (attempt, use_change, def, loc, dest, src)
627 	  || try_fwprop_subst_notes (use_insn, def, dest, src));
628 }
629 
630 /* For the given single_set INSN, containing SRC known to be a
631    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
632    is redundant due to the register being set by a LOAD_EXTEND_OP
633    load from memory.  */
634 
635 static bool
free_load_extend(rtx src,insn_info * insn)636 free_load_extend (rtx src, insn_info *insn)
637 {
638   rtx reg = XEXP (src, 0);
639   if (load_extend_op (GET_MODE (reg)) != GET_CODE (src))
640     return false;
641 
642   def_info *def = nullptr;
643   for (use_info *use : insn->uses ())
644     if (use->regno () == REGNO (reg))
645       {
646 	def = use->def ();
647 	break;
648       }
649 
650   if (!def)
651     return false;
652 
653   insn_info *def_insn = def->insn ();
654   if (def_insn->is_artificial ())
655     return false;
656 
657   rtx_insn *def_rtl = def_insn->rtl ();
658   if (NONJUMP_INSN_P (def_rtl))
659     {
660       rtx patt = PATTERN (def_rtl);
661 
662       if (GET_CODE (patt) == SET
663 	  && GET_CODE (SET_SRC (patt)) == MEM
664 	  && rtx_equal_p (SET_DEST (patt), reg))
665 	return true;
666     }
667   return false;
668 }
669 
670 /* Subroutine of forward_propagate_subreg that handles a use of DEST
671    in REF.  The other parameters are the same.  */
672 
673 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src,df_ref ref)674 forward_propagate_subreg (use_info *use, set_info *def,
675 			  rtx dest, rtx src, df_ref ref)
676 {
677   scalar_int_mode int_use_mode, src_mode;
678 
679   /* Only consider subregs... */
680   rtx use_reg = DF_REF_REG (ref);
681   machine_mode use_mode = GET_MODE (use_reg);
682   if (GET_CODE (use_reg) != SUBREG
683       || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest))
684     return false;
685 
686   /* ??? Replacing throughout the pattern would help for match_dups.  */
687   rtx *loc = DF_REF_LOC (ref);
688   if (paradoxical_subreg_p (use_reg))
689     {
690       /* If this is a paradoxical SUBREG, we have no idea what value the
691 	 extra bits would have.  However, if the operand is equivalent to
692 	 a SUBREG whose operand is the same as our mode, and all the modes
693 	 are within a word, we can just use the inner operand because
694 	 these SUBREGs just say how to treat the register.  */
695       if (GET_CODE (src) == SUBREG
696 	  && REG_P (SUBREG_REG (src))
697 	  && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
698 	  && GET_MODE (SUBREG_REG (src)) == use_mode
699 	  && subreg_lowpart_p (src))
700 	return try_fwprop_subst (use, def, loc, use_reg, SUBREG_REG (src));
701     }
702 
703   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
704      is the low part of the reg being extended then just use the inner
705      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
706      be removed due to it matching a LOAD_EXTEND_OP load from memory,
707      or due to the operation being a no-op when applied to registers.
708      For example, if we have:
709 
710 	 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
711 	 B: (... (subreg:SI (reg:DI X)) ...)
712 
713      and mode_rep_extended says that Y is already sign-extended,
714      the backend will typically allow A to be combined with the
715      definition of Y or, failing that, allow A to be deleted after
716      reload through register tying.  Introducing more uses of Y
717      prevents both optimisations.  */
718   else if (is_a <scalar_int_mode> (use_mode, &int_use_mode)
719 	   && subreg_lowpart_p (use_reg))
720     {
721       if ((GET_CODE (src) == ZERO_EXTEND
722 	   || GET_CODE (src) == SIGN_EXTEND)
723 	  && is_a <scalar_int_mode> (GET_MODE (src), &src_mode)
724 	  && REG_P (XEXP (src, 0))
725 	  && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
726 	  && GET_MODE (XEXP (src, 0)) == use_mode
727 	  && !free_load_extend (src, def->insn ())
728 	  && (targetm.mode_rep_extended (int_use_mode, src_mode)
729 	      != (int) GET_CODE (src)))
730 	return try_fwprop_subst (use, def, loc, use_reg, XEXP (src, 0));
731     }
732 
733   return false;
734 }
735 
736 /* Try to substitute (set DEST SRC), which defines DEF, into USE and simplify
737    the result, handling cases where DEST is used in a subreg and where
738    applying that subreg to SRC results in a useful simplification.  */
739 
740 static bool
forward_propagate_subreg(use_info * use,set_info * def,rtx dest,rtx src)741 forward_propagate_subreg (use_info *use, set_info *def, rtx dest, rtx src)
742 {
743   if (!use->includes_subregs () || !REG_P (dest))
744     return false;
745 
746   if (GET_CODE (src) != SUBREG
747       && GET_CODE (src) != ZERO_EXTEND
748       && GET_CODE (src) != SIGN_EXTEND)
749     return false;
750 
751   rtx_insn *use_rtl = use->insn ()->rtl ();
752   df_ref ref;
753 
754   FOR_EACH_INSN_USE (ref, use_rtl)
755     if (DF_REF_REGNO (ref) == use->regno ()
756 	&& forward_propagate_subreg (use, def, dest, src, ref))
757       return true;
758 
759   FOR_EACH_INSN_EQ_USE (ref, use_rtl)
760     if (DF_REF_REGNO (ref) == use->regno ()
761 	&& forward_propagate_subreg (use, def, dest, src, ref))
762       return true;
763 
764   return false;
765 }
766 
767 /* Try to substitute (set DEST SRC), which defines DEF, into USE and
768    simplify the result.  */
769 
770 static bool
forward_propagate_and_simplify(use_info * use,set_info * def,rtx dest,rtx src)771 forward_propagate_and_simplify (use_info *use, set_info *def,
772 				rtx dest, rtx src)
773 {
774   insn_info *use_insn = use->insn ();
775   rtx_insn *use_rtl = use_insn->rtl ();
776   insn_info *def_insn = def->insn ();
777 
778   /* ??? This check seems unnecessary.  We should be able to propagate
779      into any kind of instruction, regardless of whether it's a single set.
780      It seems odd to be more permissive with asms than normal instructions.  */
781   bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ());
782   rtx use_set = single_set (use_rtl);
783   if (need_single_set && !use_set)
784     return false;
785 
786   /* Do not propagate into PC, CC0, etc.
787 
788      ??? This too seems unnecessary.  The current code should work correctly
789      without it, including cases where jumps become unconditional.  */
790   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
791     return false;
792 
793   /* In __asm don't replace if src might need more registers than
794      reg, as that could increase register pressure on the __asm.  */
795   if (use_insn->is_asm () && def_insn->uses ().size () > 1)
796     return false;
797 
798   /* Check if the def is loading something from the constant pool; in this
799      case we would undo optimization such as compress_float_constant.
800      Still, we can set a REG_EQUAL note.  */
801   if (MEM_P (src) && MEM_READONLY_P (src))
802     {
803       rtx x = avoid_constant_pool_reference (src);
804       rtx note_set;
805       if (x != src
806 	  && (note_set = set_for_reg_notes (use_rtl))
807 	  && REG_P (SET_DEST (note_set))
808 	  && !contains_paradoxical_subreg_p (SET_SRC (note_set)))
809 	{
810 	  rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX);
811 	  rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set);
812 	  rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
813 	  if (old_rtx != new_rtx)
814 	    set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx));
815 	}
816       return false;
817     }
818 
819   /* ??? Unconditionally propagating into PATTERN would work better
820      for instructions that have match_dups.  */
821   rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl);
822   return try_fwprop_subst (use, def, loc, dest, src);
823 }
824 
825 /* Given a use USE of an insn, if it has a single reaching
826    definition, try to forward propagate it into that insn.
827    Return true if something changed.
828 
829    REG_PROP_ONLY is true if we should only propagate register copies.  */
830 
831 static bool
832 forward_propagate_into (use_info *use, bool reg_prop_only = false)
833 {
834   if (use->includes_read_writes ())
835     return false;
836 
837   /* Disregard uninitialized uses.  */
838   set_info *def = use->def ();
839   if (!def)
840     return false;
841 
842   /* Only consider single-register definitions.  This could be relaxed,
843      but it should rarely be needed before RA.  */
844   def = look_through_degenerate_phi (def);
845   if (def->includes_multiregs ())
846     return false;
847 
848   /* Only consider uses whose definition comes from a real instruction.  */
849   insn_info *def_insn = def->insn ();
850   if (def_insn->is_artificial ())
851     return false;
852 
853   rtx_insn *def_rtl = def_insn->rtl ();
854   if (!NONJUMP_INSN_P (def_rtl))
855     return false;
856   /* ??? This seems an unnecessary restriction.  We can easily tell
857      which set the definition comes from.  */
858   if (multiple_sets (def_rtl))
859     return false;
860   rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ());
861   if (!def_set)
862     return false;
863 
864   rtx dest = SET_DEST (def_set);
865   rtx src = SET_SRC (def_set);
866 
867   /* Allow propagations into a loop only for reg-to-reg copies, since
868      replacing one register by another shouldn't increase the cost.  */
869   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
870   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
871   if ((reg_prop_only || def_loop != use_loop)
872       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
873     return false;
874 
875   /* Don't substitute into a non-local goto, this confuses CFG.  */
876   insn_info *use_insn = use->insn ();
877   rtx_insn *use_rtl = use_insn->rtl ();
878   if (JUMP_P (use_rtl)
879       && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX))
880     return false;
881 
882   if (forward_propagate_and_simplify (use, def, dest, src)
883       || forward_propagate_subreg (use, def, dest, src))
884     return true;
885 
886   return false;
887 }
888 
889 static void
fwprop_init(void)890 fwprop_init (void)
891 {
892   num_changes = 0;
893   calculate_dominance_info (CDI_DOMINATORS);
894 
895   /* We do not always want to propagate into loops, so we have to find
896      loops and be careful about them.  Avoid CFG modifications so that
897      we don't have to update dominance information afterwards for
898      build_single_def_use_links.  */
899   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
900 
901   df_analyze ();
902   crtl->ssa = new rtl_ssa::function_info (cfun);
903 }
904 
905 static void
fwprop_done(void)906 fwprop_done (void)
907 {
908   loop_optimizer_finalize ();
909 
910   crtl->ssa->perform_pending_updates ();
911   free_dominance_info (CDI_DOMINATORS);
912   cleanup_cfg (0);
913 
914   delete crtl->ssa;
915   crtl->ssa = nullptr;
916 
917   delete_trivially_dead_insns (get_insns (), max_reg_num ());
918 
919   if (dump_file)
920     fprintf (dump_file,
921 	     "\nNumber of successful forward propagations: %d\n\n",
922 	     num_changes);
923 }
924 
925 /* Try to optimize INSN, returning true if something changes.
926    FWPROP_ADDR_P is true if we are running fwprop_addr rather than
927    the full fwprop.  */
928 
929 static bool
fwprop_insn(insn_info * insn,bool fwprop_addr_p)930 fwprop_insn (insn_info *insn, bool fwprop_addr_p)
931 {
932   for (use_info *use : insn->uses ())
933     {
934       if (use->is_mem ())
935 	continue;
936       /* ??? The choices here follow those in the pre-SSA code.  */
937       if (!use->includes_address_uses ())
938 	{
939 	  if (forward_propagate_into (use, fwprop_addr_p))
940 	    return true;
941 	}
942       else
943 	{
944 	  struct loop *loop = insn->bb ()->cfg_bb ()->loop_father;
945 	  /* The outermost loop is not really a loop.  */
946 	  if (loop == NULL || loop_outer (loop) == NULL)
947 	    {
948 	      if (forward_propagate_into (use, fwprop_addr_p))
949 		return true;
950 	    }
951 	  else if (fwprop_addr_p)
952 	    {
953 	      if (forward_propagate_into (use, false))
954 		return true;
955 	    }
956 	}
957     }
958   return false;
959 }
960 
961 /* Main entry point.  */
962 
963 static bool
gate_fwprop(void)964 gate_fwprop (void)
965 {
966   return optimize > 0 && flag_forward_propagate;
967 }
968 
969 static unsigned int
fwprop(bool fwprop_addr_p)970 fwprop (bool fwprop_addr_p)
971 {
972   fwprop_init ();
973 
974   /* Go through all the instructions (including debug instructions) looking
975      for uses that we could propagate into.
976 
977      Do not forward propagate addresses into loops until after unrolling.
978      CSE did so because it was able to fix its own mess, but we are not.  */
979 
980   insn_info *next;
981 
982   /* ??? This code uses a worklist in order to preserve the behavior
983      of the pre-SSA implementation.  It would be better to instead
984      iterate on each instruction until no more propagations are
985      possible, then move on to the next.  */
986   auto_vec<insn_info *> worklist;
987   for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next)
988     {
989       next = insn->next_any_insn ();
990       if (insn->can_be_optimized () || insn->is_debug_insn ())
991 	if (fwprop_insn (insn, fwprop_addr_p))
992 	  worklist.safe_push (insn);
993     }
994   for (unsigned int i = 0; i < worklist.length (); ++i)
995     {
996       insn_info *insn = worklist[i];
997       if (fwprop_insn (insn, fwprop_addr_p))
998 	worklist.safe_push (insn);
999     }
1000 
1001   fwprop_done ();
1002   return 0;
1003 }
1004 
1005 namespace {
1006 
1007 const pass_data pass_data_rtl_fwprop =
1008 {
1009   RTL_PASS, /* type */
1010   "fwprop1", /* name */
1011   OPTGROUP_NONE, /* optinfo_flags */
1012   TV_FWPROP, /* tv_id */
1013   0, /* properties_required */
1014   0, /* properties_provided */
1015   0, /* properties_destroyed */
1016   0, /* todo_flags_start */
1017   TODO_df_finish, /* todo_flags_finish */
1018 };
1019 
1020 class pass_rtl_fwprop : public rtl_opt_pass
1021 {
1022 public:
pass_rtl_fwprop(gcc::context * ctxt)1023   pass_rtl_fwprop (gcc::context *ctxt)
1024     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1025   {}
1026 
1027   /* opt_pass methods: */
gate(function *)1028   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1029   virtual unsigned int execute (function *) { return fwprop (false); }
1030 
1031 }; // class pass_rtl_fwprop
1032 
1033 } // anon namespace
1034 
1035 rtl_opt_pass *
make_pass_rtl_fwprop(gcc::context * ctxt)1036 make_pass_rtl_fwprop (gcc::context *ctxt)
1037 {
1038   return new pass_rtl_fwprop (ctxt);
1039 }
1040 
1041 namespace {
1042 
1043 const pass_data pass_data_rtl_fwprop_addr =
1044 {
1045   RTL_PASS, /* type */
1046   "fwprop2", /* name */
1047   OPTGROUP_NONE, /* optinfo_flags */
1048   TV_FWPROP, /* tv_id */
1049   0, /* properties_required */
1050   0, /* properties_provided */
1051   0, /* properties_destroyed */
1052   0, /* todo_flags_start */
1053   TODO_df_finish, /* todo_flags_finish */
1054 };
1055 
1056 class pass_rtl_fwprop_addr : public rtl_opt_pass
1057 {
1058 public:
pass_rtl_fwprop_addr(gcc::context * ctxt)1059   pass_rtl_fwprop_addr (gcc::context *ctxt)
1060     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1061   {}
1062 
1063   /* opt_pass methods: */
gate(function *)1064   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1065   virtual unsigned int execute (function *) { return fwprop (true); }
1066 
1067 }; // class pass_rtl_fwprop_addr
1068 
1069 } // anon namespace
1070 
1071 rtl_opt_pass *
make_pass_rtl_fwprop_addr(gcc::context * ctxt)1072 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1073 {
1074   return new pass_rtl_fwprop_addr (ctxt);
1075 }
1076