1 /* RTL-based forward propagation pass for GNU compiler.
2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
3    Contributed by Paolo Bonzini and Steven Bosscher.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "predict.h"
28 #include "df.h"
29 #include "tm_p.h"
30 #include "insn-config.h"
31 #include "emit-rtl.h"
32 #include "recog.h"
33 
34 #include "sparseset.h"
35 #include "cfgrtl.h"
36 #include "cfgcleanup.h"
37 #include "cfgloop.h"
38 #include "tree-pass.h"
39 #include "domwalk.h"
40 #include "rtl-iter.h"
41 
42 
43 /* This pass does simple forward propagation and simplification when an
44    operand of an insn can only come from a single def.  This pass uses
45    df.c, so it is global.  However, we only do limited analysis of
46    available expressions.
47 
48    1) The pass tries to propagate the source of the def into the use,
49    and checks if the result is independent of the substituted value.
50    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
51    zero, independent of the source register.
52 
53    In particular, we propagate constants into the use site.  Sometimes
54    RTL expansion did not put the constant in the same insn on purpose,
55    to satisfy a predicate, and the result will fail to be recognized;
56    but this happens rarely and in this case we can still create a
57    REG_EQUAL note.  For multi-word operations, this
58 
59       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
60       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
61       (set (subreg:SI (reg:DI 122) 0)
62          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
63       (set (subreg:SI (reg:DI 122) 4)
64          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
65 
66    can be simplified to the much simpler
67 
68       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
69       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
70 
71    This particular propagation is also effective at putting together
72    complex addressing modes.  We are more aggressive inside MEMs, in
73    that all definitions are propagated if the use is in a MEM; if the
74    result is a valid memory address we check address_cost to decide
75    whether the substitution is worthwhile.
76 
77    2) The pass propagates register copies.  This is not as effective as
78    the copy propagation done by CSE's canon_reg, which works by walking
79    the instruction chain, it can help the other transformations.
80 
81    We should consider removing this optimization, and instead reorder the
82    RTL passes, because GCSE does this transformation too.  With some luck,
83    the CSE pass at the end of rest_of_handle_gcse could also go away.
84 
85    3) The pass looks for paradoxical subregs that are actually unnecessary.
86    Things like this:
87 
88      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
89      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
90      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
91                                 (subreg:SI (reg:QI 121) 0)))
92 
93    are very common on machines that can only do word-sized operations.
94    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
95    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
96    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
97    above will simplify this to
98 
99      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
100      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
101      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
102 
103    where the first two insns are now dead.
104 
105    We used to use reaching definitions to find which uses have a
106    single reaching definition (sounds obvious...), but this is too
107    complex a problem in nasty testcases like PR33928.  Now we use the
108    multiple definitions problem in df-problems.c.  The similarity
109    between that problem and SSA form creation is taken further, in
110    that fwprop does a dominator walk to create its chains; however,
111    instead of creating a PHI function where multiple definitions meet
112    I just punt and record only singleton use-def chains, which is
113    all that is needed by fwprop.  */
114 
115 
116 static int num_changes;
117 
118 static vec<df_ref> use_def_ref;
119 static vec<df_ref> reg_defs;
120 static vec<df_ref> reg_defs_stack;
121 
122 /* The MD bitmaps are trimmed to include only live registers to cut
123    memory usage on testcases like insn-recog.c.  Track live registers
124    in the basic block and do not perform forward propagation if the
125    destination is a dead pseudo occurring in a note.  */
126 static bitmap local_md;
127 static bitmap local_lr;
128 
129 /* Return the only def in USE's use-def chain, or NULL if there is
130    more than one def in the chain.  */
131 
132 static inline df_ref
get_def_for_use(df_ref use)133 get_def_for_use (df_ref use)
134 {
135   return use_def_ref[DF_REF_ID (use)];
136 }
137 
138 
139 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
140    TOP_FLAG says which artificials uses should be used, when DEF_REC
141    is an artificial def vector.  LOCAL_MD is modified as after a
142    df_md_simulate_* function; we do more or less the same processing
143    done there, so we do not use those functions.  */
144 
145 #define DF_MD_GEN_FLAGS \
146 	(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
147 
148 static void
process_defs(df_ref def,int top_flag)149 process_defs (df_ref def, int top_flag)
150 {
151   for (; def; def = DF_REF_NEXT_LOC (def))
152     {
153       df_ref curr_def = reg_defs[DF_REF_REGNO (def)];
154       unsigned int dregno;
155 
156       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
157 	continue;
158 
159       dregno = DF_REF_REGNO (def);
160       if (curr_def)
161 	reg_defs_stack.safe_push (curr_def);
162       else
163 	{
164 	  /* Do not store anything if "transitioning" from NULL to NULL.  But
165              otherwise, push a special entry on the stack to tell the
166 	     leave_block callback that the entry in reg_defs was NULL.  */
167 	  if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
168 	    ;
169 	  else
170 	    reg_defs_stack.safe_push (def);
171 	}
172 
173       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
174 	{
175 	  bitmap_set_bit (local_md, dregno);
176 	  reg_defs[dregno] = NULL;
177 	}
178       else
179 	{
180 	  bitmap_clear_bit (local_md, dregno);
181 	  reg_defs[dregno] = def;
182 	}
183     }
184 }
185 
186 
187 /* Fill the use_def_ref vector with values for the uses in USE_REC,
188    taking reaching definitions info from LOCAL_MD and REG_DEFS.
189    TOP_FLAG says which artificials uses should be used, when USE_REC
190    is an artificial use vector.  */
191 
192 static void
process_uses(df_ref use,int top_flag)193 process_uses (df_ref use, int top_flag)
194 {
195   for (; use; use = DF_REF_NEXT_LOC (use))
196     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
197       {
198         unsigned int uregno = DF_REF_REGNO (use);
199         if (reg_defs[uregno]
200 	    && !bitmap_bit_p (local_md, uregno)
201 	    && bitmap_bit_p (local_lr, uregno))
202 	  use_def_ref[DF_REF_ID (use)] = reg_defs[uregno];
203       }
204 }
205 
206 class single_def_use_dom_walker : public dom_walker
207 {
208 public:
single_def_use_dom_walker(cdi_direction direction)209   single_def_use_dom_walker (cdi_direction direction)
210     : dom_walker (direction) {}
211   virtual edge before_dom_children (basic_block);
212   virtual void after_dom_children (basic_block);
213 };
214 
215 edge
before_dom_children(basic_block bb)216 single_def_use_dom_walker::before_dom_children (basic_block bb)
217 {
218   int bb_index = bb->index;
219   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
220   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
221   rtx_insn *insn;
222 
223   bitmap_copy (local_md, &md_bb_info->in);
224   bitmap_copy (local_lr, &lr_bb_info->in);
225 
226   /* Push a marker for the leave_block callback.  */
227   reg_defs_stack.safe_push (NULL);
228 
229   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
230   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
231 
232   /* We don't call df_simulate_initialize_forwards, as it may overestimate
233      the live registers if there are unused artificial defs.  We prefer
234      liveness to be underestimated.  */
235 
236   FOR_BB_INSNS (bb, insn)
237     if (INSN_P (insn))
238       {
239         unsigned int uid = INSN_UID (insn);
240         process_uses (DF_INSN_UID_USES (uid), 0);
241         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
242         process_defs (DF_INSN_UID_DEFS (uid), 0);
243 	df_simulate_one_insn_forwards (bb, insn, local_lr);
244       }
245 
246   process_uses (df_get_artificial_uses (bb_index), 0);
247   process_defs (df_get_artificial_defs (bb_index), 0);
248 
249   return NULL;
250 }
251 
252 /* Pop the definitions created in this basic block when leaving its
253    dominated parts.  */
254 
255 void
after_dom_children(basic_block bb ATTRIBUTE_UNUSED)256 single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED)
257 {
258   df_ref saved_def;
259   while ((saved_def = reg_defs_stack.pop ()) != NULL)
260     {
261       unsigned int dregno = DF_REF_REGNO (saved_def);
262 
263       /* See also process_defs.  */
264       if (saved_def == reg_defs[dregno])
265 	reg_defs[dregno] = NULL;
266       else
267 	reg_defs[dregno] = saved_def;
268     }
269 }
270 
271 
272 /* Build a vector holding the reaching definitions of uses reached by a
273    single dominating definition.  */
274 
275 static void
build_single_def_use_links(void)276 build_single_def_use_links (void)
277 {
278   /* We use the multiple definitions problem to compute our restricted
279      use-def chains.  */
280   df_set_flags (DF_EQ_NOTES);
281   df_md_add_problem ();
282   df_note_add_problem ();
283   df_analyze ();
284   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
285 
286   use_def_ref.create (DF_USES_TABLE_SIZE ());
287   use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE ());
288 
289   reg_defs.create (max_reg_num ());
290   reg_defs.safe_grow_cleared (max_reg_num ());
291 
292   reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10);
293   local_md = BITMAP_ALLOC (NULL);
294   local_lr = BITMAP_ALLOC (NULL);
295 
296   /* Walk the dominator tree looking for single reaching definitions
297      dominating the uses.  This is similar to how SSA form is built.  */
298   single_def_use_dom_walker (CDI_DOMINATORS)
299     .walk (cfun->cfg->x_entry_block_ptr);
300 
301   BITMAP_FREE (local_lr);
302   BITMAP_FREE (local_md);
303   reg_defs.release ();
304   reg_defs_stack.release ();
305 }
306 
307 
308 /* Do not try to replace constant addresses or addresses of local and
309    argument slots.  These MEM expressions are made only once and inserted
310    in many instructions, as well as being used to control symbol table
311    output.  It is not safe to clobber them.
312 
313    There are some uncommon cases where the address is already in a register
314    for some reason, but we cannot take advantage of that because we have
315    no easy way to unshare the MEM.  In addition, looking up all stack
316    addresses is costly.  */
317 
318 static bool
can_simplify_addr(rtx addr)319 can_simplify_addr (rtx addr)
320 {
321   rtx reg;
322 
323   if (CONSTANT_ADDRESS_P (addr))
324     return false;
325 
326   if (GET_CODE (addr) == PLUS)
327     reg = XEXP (addr, 0);
328   else
329     reg = addr;
330 
331   return (!REG_P (reg)
332 	  || (REGNO (reg) != FRAME_POINTER_REGNUM
333 	      && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
334 	      && REGNO (reg) != ARG_POINTER_REGNUM));
335 }
336 
337 /* Returns a canonical version of X for the address, from the point of view,
338    that all multiplications are represented as MULT instead of the multiply
339    by a power of 2 being represented as ASHIFT.
340 
341    Every ASHIFT we find has been made by simplify_gen_binary and was not
342    there before, so it is not shared.  So we can do this in place.  */
343 
344 static void
canonicalize_address(rtx x)345 canonicalize_address (rtx x)
346 {
347   for (;;)
348     switch (GET_CODE (x))
349       {
350       case ASHIFT:
351         if (CONST_INT_P (XEXP (x, 1))
352             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
353             && INTVAL (XEXP (x, 1)) >= 0)
354 	  {
355 	    HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
356 	    PUT_CODE (x, MULT);
357 	    XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
358 					GET_MODE (x));
359 	  }
360 
361 	x = XEXP (x, 0);
362         break;
363 
364       case PLUS:
365         if (GET_CODE (XEXP (x, 0)) == PLUS
366 	    || GET_CODE (XEXP (x, 0)) == ASHIFT
367 	    || GET_CODE (XEXP (x, 0)) == CONST)
368 	  canonicalize_address (XEXP (x, 0));
369 
370 	x = XEXP (x, 1);
371         break;
372 
373       case CONST:
374 	x = XEXP (x, 0);
375         break;
376 
377       default:
378         return;
379       }
380 }
381 
382 /* OLD is a memory address.  Return whether it is good to use NEW instead,
383    for a memory access in the given MODE.  */
384 
385 static bool
should_replace_address(rtx old_rtx,rtx new_rtx,machine_mode mode,addr_space_t as,bool speed)386 should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode,
387 			addr_space_t as, bool speed)
388 {
389   int gain;
390 
391   if (rtx_equal_p (old_rtx, new_rtx)
392       || !memory_address_addr_space_p (mode, new_rtx, as))
393     return false;
394 
395   /* Copy propagation is always ok.  */
396   if (REG_P (old_rtx) && REG_P (new_rtx))
397     return true;
398 
399   /* Prefer the new address if it is less expensive.  */
400   gain = (address_cost (old_rtx, mode, as, speed)
401 	  - address_cost (new_rtx, mode, as, speed));
402 
403   /* If the addresses have equivalent cost, prefer the new address
404      if it has the highest `set_src_cost'.  That has the potential of
405      eliminating the most insns without additional costs, and it
406      is the same that cse.c used to do.  */
407   if (gain == 0)
408     gain = (set_src_cost (new_rtx, VOIDmode, speed)
409 	    - set_src_cost (old_rtx, VOIDmode, speed));
410 
411   return (gain > 0);
412 }
413 
414 
415 /* Flags for the last parameter of propagate_rtx_1.  */
416 
417 enum {
418   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
419      if it is false, propagate_rtx_1 returns false if, for at least
420      one occurrence OLD, it failed to collapse the result to a constant.
421      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
422      collapse to zero if replacing (reg:M B) with (reg:M A).
423 
424      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
425      propagate_rtx_1 just tries to make cheaper and valid memory
426      addresses.  */
427   PR_CAN_APPEAR = 1,
428 
429   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
430      outside memory addresses.  This is needed because propagate_rtx_1 does
431      not do any analysis on memory; thus it is very conservative and in general
432      it will fail if non-read-only MEMs are found in the source expression.
433 
434      PR_HANDLE_MEM is set when the source of the propagation was not
435      another MEM.  Then, it is safe not to treat non-read-only MEMs as
436      ``opaque'' objects.  */
437   PR_HANDLE_MEM = 2,
438 
439   /* Set when costs should be optimized for speed.  */
440   PR_OPTIMIZE_FOR_SPEED = 4
441 };
442 
443 
444 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
445    resulting expression.  Replace *PX with a new RTL expression if an
446    occurrence of OLD was found.
447 
448    This is only a wrapper around simplify-rtx.c: do not add any pattern
449    matching code here.  (The sole exception is the handling of LO_SUM, but
450    that is because there is no simplify_gen_* function for LO_SUM).  */
451 
452 static bool
propagate_rtx_1(rtx * px,rtx old_rtx,rtx new_rtx,int flags)453 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
454 {
455   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
456   enum rtx_code code = GET_CODE (x);
457   machine_mode mode = GET_MODE (x);
458   machine_mode op_mode;
459   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
460   bool valid_ops = true;
461 
462   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
463     {
464       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
465 	 they have side effects or not).  */
466       *px = (side_effects_p (x)
467 	     ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
468 	     : gen_rtx_SCRATCH (GET_MODE (x)));
469       return false;
470     }
471 
472   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
473      address, and we are *not* inside one.  */
474   if (x == old_rtx)
475     {
476       *px = new_rtx;
477       return can_appear;
478     }
479 
480   /* If this is an expression, try recursive substitution.  */
481   switch (GET_RTX_CLASS (code))
482     {
483     case RTX_UNARY:
484       op0 = XEXP (x, 0);
485       op_mode = GET_MODE (op0);
486       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
487       if (op0 == XEXP (x, 0))
488 	return true;
489       tem = simplify_gen_unary (code, mode, op0, op_mode);
490       break;
491 
492     case RTX_BIN_ARITH:
493     case RTX_COMM_ARITH:
494       op0 = XEXP (x, 0);
495       op1 = XEXP (x, 1);
496       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
497       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
498       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
499 	return true;
500       tem = simplify_gen_binary (code, mode, op0, op1);
501       break;
502 
503     case RTX_COMPARE:
504     case RTX_COMM_COMPARE:
505       op0 = XEXP (x, 0);
506       op1 = XEXP (x, 1);
507       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
508       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
509       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
510       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
511 	return true;
512       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
513       break;
514 
515     case RTX_TERNARY:
516     case RTX_BITFIELD_OPS:
517       op0 = XEXP (x, 0);
518       op1 = XEXP (x, 1);
519       op2 = XEXP (x, 2);
520       op_mode = GET_MODE (op0);
521       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
522       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
523       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
524       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
525 	return true;
526       if (op_mode == VOIDmode)
527 	op_mode = GET_MODE (op0);
528       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
529       break;
530 
531     case RTX_EXTRA:
532       /* The only case we try to handle is a SUBREG.  */
533       if (code == SUBREG)
534 	{
535           op0 = XEXP (x, 0);
536 	  valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
537           if (op0 == XEXP (x, 0))
538 	    return true;
539 	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
540 				     SUBREG_BYTE (x));
541 	}
542       break;
543 
544     case RTX_OBJ:
545       if (code == MEM && x != new_rtx)
546 	{
547 	  rtx new_op0;
548 	  op0 = XEXP (x, 0);
549 
550 	  /* There are some addresses that we cannot work on.  */
551 	  if (!can_simplify_addr (op0))
552 	    return true;
553 
554 	  op0 = new_op0 = targetm.delegitimize_address (op0);
555 	  valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
556 					flags | PR_CAN_APPEAR);
557 
558 	  /* Dismiss transformation that we do not want to carry on.  */
559 	  if (!valid_ops
560 	      || new_op0 == op0
561 	      || !(GET_MODE (new_op0) == GET_MODE (op0)
562 		   || GET_MODE (new_op0) == VOIDmode))
563 	    return true;
564 
565 	  canonicalize_address (new_op0);
566 
567 	  /* Copy propagations are always ok.  Otherwise check the costs.  */
568 	  if (!(REG_P (old_rtx) && REG_P (new_rtx))
569 	      && !should_replace_address (op0, new_op0, GET_MODE (x),
570 					  MEM_ADDR_SPACE (x),
571 	      			 	  flags & PR_OPTIMIZE_FOR_SPEED))
572 	    return true;
573 
574 	  tem = replace_equiv_address_nv (x, new_op0);
575 	}
576 
577       else if (code == LO_SUM)
578 	{
579           op0 = XEXP (x, 0);
580           op1 = XEXP (x, 1);
581 
582 	  /* The only simplification we do attempts to remove references to op0
583 	     or make it constant -- in both cases, op0's invalidity will not
584 	     make the result invalid.  */
585 	  propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
586 	  valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
587           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
588 	    return true;
589 
590 	  /* (lo_sum (high x) x) -> x  */
591 	  if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
592 	    tem = op1;
593 	  else
594 	    tem = gen_rtx_LO_SUM (mode, op0, op1);
595 
596 	  /* OP1 is likely not a legitimate address, otherwise there would have
597 	     been no LO_SUM.  We want it to disappear if it is invalid, return
598 	     false in that case.  */
599 	  return memory_address_p (mode, tem);
600 	}
601 
602       else if (code == REG)
603 	{
604 	  if (rtx_equal_p (x, old_rtx))
605 	    {
606               *px = new_rtx;
607               return can_appear;
608 	    }
609 	}
610       break;
611 
612     default:
613       break;
614     }
615 
616   /* No change, no trouble.  */
617   if (tem == NULL_RTX)
618     return true;
619 
620   *px = tem;
621 
622   /* The replacement we made so far is valid, if all of the recursive
623      replacements were valid, or we could simplify everything to
624      a constant.  */
625   return valid_ops || can_appear || CONSTANT_P (tem);
626 }
627 
628 
629 /* Return true if X constains a non-constant mem.  */
630 
631 static bool
varying_mem_p(const_rtx x)632 varying_mem_p (const_rtx x)
633 {
634   subrtx_iterator::array_type array;
635   FOR_EACH_SUBRTX (iter, array, x, NONCONST)
636     if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
637       return true;
638   return false;
639 }
640 
641 
642 /* Replace all occurrences of OLD in X with NEW and try to simplify the
643    resulting expression (in mode MODE).  Return a new expression if it is
644    a constant, otherwise X.
645 
646    Simplifications where occurrences of NEW collapse to a constant are always
647    accepted.  All simplifications are accepted if NEW is a pseudo too.
648    Otherwise, we accept simplifications that have a lower or equal cost.  */
649 
650 static rtx
propagate_rtx(rtx x,machine_mode mode,rtx old_rtx,rtx new_rtx,bool speed)651 propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx,
652 	       bool speed)
653 {
654   rtx tem;
655   bool collapsed;
656   int flags;
657 
658   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
659     return NULL_RTX;
660 
661   flags = 0;
662   if (REG_P (new_rtx)
663       || CONSTANT_P (new_rtx)
664       || (GET_CODE (new_rtx) == SUBREG
665 	  && REG_P (SUBREG_REG (new_rtx))
666 	  && (GET_MODE_SIZE (mode)
667 	      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (new_rtx))))))
668     flags |= PR_CAN_APPEAR;
669   if (!varying_mem_p (new_rtx))
670     flags |= PR_HANDLE_MEM;
671 
672   if (speed)
673     flags |= PR_OPTIMIZE_FOR_SPEED;
674 
675   tem = x;
676   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
677   if (tem == x || !collapsed)
678     return NULL_RTX;
679 
680   /* gen_lowpart_common will not be able to process VOIDmode entities other
681      than CONST_INTs.  */
682   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
683     return NULL_RTX;
684 
685   if (GET_MODE (tem) == VOIDmode)
686     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
687   else
688     gcc_assert (GET_MODE (tem) == mode);
689 
690   return tem;
691 }
692 
693 
694 
695 
696 /* Return true if the register from reference REF is killed
697    between FROM to (but not including) TO.  */
698 
699 static bool
local_ref_killed_between_p(df_ref ref,rtx_insn * from,rtx_insn * to)700 local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to)
701 {
702   rtx_insn *insn;
703 
704   for (insn = from; insn != to; insn = NEXT_INSN (insn))
705     {
706       df_ref def;
707       if (!INSN_P (insn))
708 	continue;
709 
710       FOR_EACH_INSN_DEF (def, insn)
711 	if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
712 	  return true;
713     }
714   return false;
715 }
716 
717 
718 /* Check if the given DEF is available in INSN.  This would require full
719    computation of available expressions; we check only restricted conditions:
720    - if DEF is the sole definition of its register, go ahead;
721    - in the same basic block, we check for no definitions killing the
722      definition of DEF_INSN;
723    - if USE's basic block has DEF's basic block as the sole predecessor,
724      we check if the definition is killed after DEF_INSN or before
725      TARGET_INSN insn, in their respective basic blocks.  */
726 static bool
use_killed_between(df_ref use,rtx_insn * def_insn,rtx_insn * target_insn)727 use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn)
728 {
729   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
730   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
731   int regno;
732   df_ref def;
733 
734   /* We used to have a def reaching a use that is _before_ the def,
735      with the def not dominating the use even though the use and def
736      are in the same basic block, when a register may be used
737      uninitialized in a loop.  This should not happen anymore since
738      we do not use reaching definitions, but still we test for such
739      cases and assume that DEF is not available.  */
740   if (def_bb == target_bb
741       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
742       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
743     return true;
744 
745   /* Check if the reg in USE has only one definition.  We already
746      know that this definition reaches use, or we wouldn't be here.
747      However, this is invalid for hard registers because if they are
748      live at the beginning of the function it does not mean that we
749      have an uninitialized access.  */
750   regno = DF_REF_REGNO (use);
751   def = DF_REG_DEF_CHAIN (regno);
752   if (def
753       && DF_REF_NEXT_REG (def) == NULL
754       && regno >= FIRST_PSEUDO_REGISTER)
755     return false;
756 
757   /* Check locally if we are in the same basic block.  */
758   if (def_bb == target_bb)
759     return local_ref_killed_between_p (use, def_insn, target_insn);
760 
761   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
762   if (single_pred_p (target_bb)
763       && single_pred (target_bb) == def_bb)
764     {
765       df_ref x;
766 
767       /* See if USE is killed between DEF_INSN and the last insn in the
768 	 basic block containing DEF_INSN.  */
769       x = df_bb_regno_last_def_find (def_bb, regno);
770       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
771 	return true;
772 
773       /* See if USE is killed between TARGET_INSN and the first insn in the
774 	 basic block containing TARGET_INSN.  */
775       x = df_bb_regno_first_def_find (target_bb, regno);
776       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
777 	return true;
778 
779       return false;
780     }
781 
782   /* Otherwise assume the worst case.  */
783   return true;
784 }
785 
786 
787 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
788    would require full computation of available expressions;
789    we check only restricted conditions, see use_killed_between.  */
790 static bool
all_uses_available_at(rtx_insn * def_insn,rtx_insn * target_insn)791 all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn)
792 {
793   df_ref use;
794   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
795   rtx def_set = single_set (def_insn);
796   rtx_insn *next;
797 
798   gcc_assert (def_set);
799 
800   /* If target_insn comes right after def_insn, which is very common
801      for addresses, we can use a quicker test.  Ignore debug insns
802      other than target insns for this.  */
803   next = NEXT_INSN (def_insn);
804   while (next && next != target_insn && DEBUG_INSN_P (next))
805     next = NEXT_INSN (next);
806   if (next == target_insn && REG_P (SET_DEST (def_set)))
807     {
808       rtx def_reg = SET_DEST (def_set);
809 
810       /* If the insn uses the reg that it defines, the substitution is
811          invalid.  */
812       FOR_EACH_INSN_INFO_USE (use, insn_info)
813 	if (rtx_equal_p (DF_REF_REG (use), def_reg))
814 	  return false;
815       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
816 	if (rtx_equal_p (DF_REF_REG (use), def_reg))
817 	  return false;
818     }
819   else
820     {
821       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
822 
823       /* Look at all the uses of DEF_INSN, and see if they are not
824 	 killed between DEF_INSN and TARGET_INSN.  */
825       FOR_EACH_INSN_INFO_USE (use, insn_info)
826 	{
827 	  if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
828 	    return false;
829 	  if (use_killed_between (use, def_insn, target_insn))
830 	    return false;
831 	}
832       FOR_EACH_INSN_INFO_EQ_USE (use, insn_info)
833 	{
834 	  if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
835 	    return false;
836 	  if (use_killed_between (use, def_insn, target_insn))
837 	    return false;
838 	}
839     }
840 
841   return true;
842 }
843 
844 
845 static df_ref *active_defs;
846 static sparseset active_defs_check;
847 
848 /* Fill the ACTIVE_DEFS array with the use->def link for the registers
849    mentioned in USE_REC.  Register the valid entries in ACTIVE_DEFS_CHECK
850    too, for checking purposes.  */
851 
852 static void
register_active_defs(df_ref use)853 register_active_defs (df_ref use)
854 {
855   for (; use; use = DF_REF_NEXT_LOC (use))
856     {
857       df_ref def = get_def_for_use (use);
858       int regno = DF_REF_REGNO (use);
859 
860       if (flag_checking)
861 	sparseset_set_bit (active_defs_check, regno);
862       active_defs[regno] = def;
863     }
864 }
865 
866 
867 /* Build the use->def links that we use to update the dataflow info
868    for new uses.  Note that building the links is very cheap and if
869    it were done earlier, they could be used to rule out invalid
870    propagations (in addition to what is done in all_uses_available_at).
871    I'm not doing this yet, though.  */
872 
873 static void
update_df_init(rtx_insn * def_insn,rtx_insn * insn)874 update_df_init (rtx_insn *def_insn, rtx_insn *insn)
875 {
876   if (flag_checking)
877     sparseset_clear (active_defs_check);
878   register_active_defs (DF_INSN_USES (def_insn));
879   register_active_defs (DF_INSN_USES (insn));
880   register_active_defs (DF_INSN_EQ_USES (insn));
881 }
882 
883 
884 /* Update the USE_DEF_REF array for the given use, using the active definitions
885    in the ACTIVE_DEFS array to match pseudos to their def. */
886 
887 static inline void
update_uses(df_ref use)888 update_uses (df_ref use)
889 {
890   for (; use; use = DF_REF_NEXT_LOC (use))
891     {
892       int regno = DF_REF_REGNO (use);
893 
894       /* Set up the use-def chain.  */
895       if (DF_REF_ID (use) >= (int) use_def_ref.length ())
896         use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1);
897 
898       if (flag_checking)
899 	gcc_assert (sparseset_bit_p (active_defs_check, regno));
900       use_def_ref[DF_REF_ID (use)] = active_defs[regno];
901     }
902 }
903 
904 
905 /* Update the USE_DEF_REF array for the uses in INSN.  Only update note
906    uses if NOTES_ONLY is true.  */
907 
908 static void
update_df(rtx_insn * insn,rtx note)909 update_df (rtx_insn *insn, rtx note)
910 {
911   struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
912 
913   if (note)
914     {
915       df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE);
916       df_notes_rescan (insn);
917     }
918   else
919     {
920       df_uses_create (&PATTERN (insn), insn, 0);
921       df_insn_rescan (insn);
922       update_uses (DF_INSN_INFO_USES (insn_info));
923     }
924 
925   update_uses (DF_INSN_INFO_EQ_USES (insn_info));
926 }
927 
928 
929 /* Try substituting NEW into LOC, which originated from forward propagation
930    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
931    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
932    new insn is not recognized.  Return whether the substitution was
933    performed.  */
934 
935 static bool
try_fwprop_subst(df_ref use,rtx * loc,rtx new_rtx,rtx_insn * def_insn,bool set_reg_equal)936 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn,
937 		  bool set_reg_equal)
938 {
939   rtx_insn *insn = DF_REF_INSN (use);
940   rtx set = single_set (insn);
941   rtx note = NULL_RTX;
942   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
943   int old_cost = 0;
944   bool ok;
945 
946   update_df_init (def_insn, insn);
947 
948   /* forward_propagate_subreg may be operating on an instruction with
949      multiple sets.  If so, assume the cost of the new instruction is
950      not greater than the old one.  */
951   if (set)
952     old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed);
953   if (dump_file)
954     {
955       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
956       print_inline_rtx (dump_file, *loc, 2);
957       fprintf (dump_file, "\n with ");
958       print_inline_rtx (dump_file, new_rtx, 2);
959       fprintf (dump_file, "\n");
960     }
961 
962   validate_unshare_change (insn, loc, new_rtx, true);
963   if (!verify_changes (0))
964     {
965       if (dump_file)
966 	fprintf (dump_file, "Changes to insn %d not recognized\n",
967 		 INSN_UID (insn));
968       ok = false;
969     }
970 
971   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
972 	   && set
973 	   && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed)
974 	       > old_cost))
975     {
976       if (dump_file)
977 	fprintf (dump_file, "Changes to insn %d not profitable\n",
978 		 INSN_UID (insn));
979       ok = false;
980     }
981 
982   else
983     {
984       if (dump_file)
985 	fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
986       ok = true;
987     }
988 
989   if (ok)
990     {
991       confirm_change_group ();
992       num_changes++;
993     }
994   else
995     {
996       cancel_changes (0);
997 
998       /* Can also record a simplified value in a REG_EQUAL note,
999 	 making a new one if one does not already exist.  */
1000       if (set_reg_equal)
1001 	{
1002 	  /* If there are any paradoxical SUBREGs, don't add REG_EQUAL note,
1003 	     because the bits in there can be anything and so might not
1004 	     match the REG_EQUAL note content.  See PR70574.  */
1005 	  subrtx_var_iterator::array_type array;
1006 	  FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
1007 	    {
1008 	      rtx x = *iter;
1009 	      if (SUBREG_P (x) && paradoxical_subreg_p (x))
1010 		{
1011 		  set_reg_equal = false;
1012 		  break;
1013 		}
1014 	    }
1015 
1016 	  if (set_reg_equal)
1017 	    {
1018 	      if (dump_file)
1019 		fprintf (dump_file, " Setting REG_EQUAL note\n");
1020 
1021 	      note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1022 	    }
1023 	}
1024     }
1025 
1026   if ((ok || note) && !CONSTANT_P (new_rtx))
1027     update_df (insn, note);
1028 
1029   return ok;
1030 }
1031 
1032 /* For the given single_set INSN, containing SRC known to be a
1033    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1034    is redundant due to the register being set by a LOAD_EXTEND_OP
1035    load from memory.  */
1036 
1037 static bool
free_load_extend(rtx src,rtx_insn * insn)1038 free_load_extend (rtx src, rtx_insn *insn)
1039 {
1040   rtx reg;
1041   df_ref def, use;
1042 
1043   reg = XEXP (src, 0);
1044 #ifdef LOAD_EXTEND_OP
1045   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1046 #endif
1047     return false;
1048 
1049   FOR_EACH_INSN_USE (use, insn)
1050     if (!DF_REF_IS_ARTIFICIAL (use)
1051 	&& DF_REF_TYPE (use) == DF_REF_REG_USE
1052 	&& DF_REF_REG (use) == reg)
1053       break;
1054   if (!use)
1055     return false;
1056 
1057   def = get_def_for_use (use);
1058   if (!def)
1059     return false;
1060 
1061   if (DF_REF_IS_ARTIFICIAL (def))
1062     return false;
1063 
1064   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1065     {
1066       rtx patt = PATTERN (DF_REF_INSN (def));
1067 
1068       if (GET_CODE (patt) == SET
1069 	  && GET_CODE (SET_SRC (patt)) == MEM
1070 	  && rtx_equal_p (SET_DEST (patt), reg))
1071 	return true;
1072     }
1073   return false;
1074 }
1075 
1076 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1077 
1078 static bool
forward_propagate_subreg(df_ref use,rtx_insn * def_insn,rtx def_set)1079 forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set)
1080 {
1081   rtx use_reg = DF_REF_REG (use);
1082   rtx_insn *use_insn;
1083   rtx src;
1084 
1085   /* Only consider subregs... */
1086   machine_mode use_mode = GET_MODE (use_reg);
1087   if (GET_CODE (use_reg) != SUBREG
1088       || !REG_P (SET_DEST (def_set)))
1089     return false;
1090 
1091   /* If this is a paradoxical SUBREG...  */
1092   if (GET_MODE_SIZE (use_mode)
1093       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1094     {
1095       /* If this is a paradoxical SUBREG, we have no idea what value the
1096 	 extra bits would have.  However, if the operand is equivalent to
1097 	 a SUBREG whose operand is the same as our mode, and all the modes
1098 	 are within a word, we can just use the inner operand because
1099 	 these SUBREGs just say how to treat the register.  */
1100       use_insn = DF_REF_INSN (use);
1101       src = SET_SRC (def_set);
1102       if (GET_CODE (src) == SUBREG
1103 	  && REG_P (SUBREG_REG (src))
1104 	  && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER
1105 	  && GET_MODE (SUBREG_REG (src)) == use_mode
1106 	  && subreg_lowpart_p (src)
1107 	  && all_uses_available_at (def_insn, use_insn))
1108 	return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1109 				 def_insn, false);
1110     }
1111 
1112   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1113      is the low part of the reg being extended then just use the inner
1114      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1115      be removed due to it matching a LOAD_EXTEND_OP load from memory,
1116      or due to the operation being a no-op when applied to registers.
1117      For example, if we have:
1118 
1119 	 A: (set (reg:DI X) (sign_extend:DI (reg:SI Y)))
1120 	 B: (... (subreg:SI (reg:DI X)) ...)
1121 
1122      and mode_rep_extended says that Y is already sign-extended,
1123      the backend will typically allow A to be combined with the
1124      definition of Y or, failing that, allow A to be deleted after
1125      reload through register tying.  Introducing more uses of Y
1126      prevents both optimisations.  */
1127   else if (subreg_lowpart_p (use_reg))
1128     {
1129       use_insn = DF_REF_INSN (use);
1130       src = SET_SRC (def_set);
1131       if ((GET_CODE (src) == ZERO_EXTEND
1132 	   || GET_CODE (src) == SIGN_EXTEND)
1133 	  && REG_P (XEXP (src, 0))
1134 	  && REGNO (XEXP (src, 0)) >= FIRST_PSEUDO_REGISTER
1135 	  && GET_MODE (XEXP (src, 0)) == use_mode
1136 	  && !free_load_extend (src, def_insn)
1137 	  && (targetm.mode_rep_extended (use_mode, GET_MODE (src))
1138 	      != (int) GET_CODE (src))
1139 	  && all_uses_available_at (def_insn, use_insn))
1140 	return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1141 				 def_insn, false);
1142     }
1143 
1144   return false;
1145 }
1146 
1147 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1148 
1149 static bool
forward_propagate_asm(df_ref use,rtx_insn * def_insn,rtx def_set,rtx reg)1150 forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg)
1151 {
1152   rtx_insn *use_insn = DF_REF_INSN (use);
1153   rtx src, use_pat, asm_operands, new_rtx, *loc;
1154   int speed_p, i;
1155   df_ref uses;
1156 
1157   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1158 
1159   src = SET_SRC (def_set);
1160   use_pat = PATTERN (use_insn);
1161 
1162   /* In __asm don't replace if src might need more registers than
1163      reg, as that could increase register pressure on the __asm.  */
1164   uses = DF_INSN_USES (def_insn);
1165   if (uses && DF_REF_NEXT_LOC (uses))
1166     return false;
1167 
1168   update_df_init (def_insn, use_insn);
1169   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1170   asm_operands = NULL_RTX;
1171   switch (GET_CODE (use_pat))
1172     {
1173     case ASM_OPERANDS:
1174       asm_operands = use_pat;
1175       break;
1176     case SET:
1177       if (MEM_P (SET_DEST (use_pat)))
1178 	{
1179 	  loc = &SET_DEST (use_pat);
1180 	  new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1181 	  if (new_rtx)
1182 	    validate_unshare_change (use_insn, loc, new_rtx, true);
1183 	}
1184       asm_operands = SET_SRC (use_pat);
1185       break;
1186     case PARALLEL:
1187       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1188 	if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1189 	  {
1190 	    if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1191 	      {
1192 		loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1193 		new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1194 					 src, speed_p);
1195 		if (new_rtx)
1196 		  validate_unshare_change (use_insn, loc, new_rtx, true);
1197 	      }
1198 	    asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1199 	  }
1200 	else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1201 	  asm_operands = XVECEXP (use_pat, 0, i);
1202       break;
1203     default:
1204       gcc_unreachable ();
1205     }
1206 
1207   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1208   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1209     {
1210       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1211       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1212       if (new_rtx)
1213 	validate_unshare_change (use_insn, loc, new_rtx, true);
1214     }
1215 
1216   if (num_changes_pending () == 0 || !apply_change_group ())
1217     return false;
1218 
1219   update_df (use_insn, NULL);
1220   num_changes++;
1221   return true;
1222 }
1223 
1224 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1225    result.  */
1226 
1227 static bool
forward_propagate_and_simplify(df_ref use,rtx_insn * def_insn,rtx def_set)1228 forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
1229 {
1230   rtx_insn *use_insn = DF_REF_INSN (use);
1231   rtx use_set = single_set (use_insn);
1232   rtx src, reg, new_rtx, *loc;
1233   bool set_reg_equal;
1234   machine_mode mode;
1235   int asm_use = -1;
1236 
1237   if (INSN_CODE (use_insn) < 0)
1238     asm_use = asm_noperands (PATTERN (use_insn));
1239 
1240   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1241     return false;
1242 
1243   /* Do not propagate into PC, CC0, etc.  */
1244   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1245     return false;
1246 
1247   /* If def and use are subreg, check if they match.  */
1248   reg = DF_REF_REG (use);
1249   if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG)
1250     {
1251       if (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg))
1252 	return false;
1253     }
1254   /* Check if the def had a subreg, but the use has the whole reg.  */
1255   else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1256     return false;
1257   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1258      previous case, the optimization is possible and often useful indeed.  */
1259   else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1260     reg = SUBREG_REG (reg);
1261 
1262   /* Make sure that we can treat REG as having the same mode as the
1263      source of DEF_SET.  */
1264   if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg))
1265     return false;
1266 
1267   /* Check if the substitution is valid (last, because it's the most
1268      expensive check!).  */
1269   src = SET_SRC (def_set);
1270   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1271     return false;
1272 
1273   /* Check if the def is loading something from the constant pool; in this
1274      case we would undo optimization such as compress_float_constant.
1275      Still, we can set a REG_EQUAL note.  */
1276   if (MEM_P (src) && MEM_READONLY_P (src))
1277     {
1278       rtx x = avoid_constant_pool_reference (src);
1279       if (x != src && use_set)
1280 	{
1281           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1282 	  rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1283 	  rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1284 	  if (old_rtx != new_rtx)
1285             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1286 	}
1287       return false;
1288     }
1289 
1290   if (asm_use >= 0)
1291     return forward_propagate_asm (use, def_insn, def_set, reg);
1292 
1293   /* Else try simplifying.  */
1294 
1295   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1296     {
1297       loc = &SET_DEST (use_set);
1298       set_reg_equal = false;
1299     }
1300   else if (!use_set)
1301     {
1302       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1303       set_reg_equal = false;
1304     }
1305   else
1306     {
1307       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1308       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1309 	loc = &XEXP (note, 0);
1310       else
1311 	loc = &SET_SRC (use_set);
1312 
1313       /* Do not replace an existing REG_EQUAL note if the insn is not
1314 	 recognized.  Either we're already replacing in the note, or we'll
1315 	 separately try plugging the definition in the note and simplifying.
1316 	 And only install a REQ_EQUAL note when the destination is a REG
1317 	 that isn't mentioned in USE_SET, as the note would be invalid
1318 	 otherwise.  We also don't want to install a note if we are merely
1319 	 propagating a pseudo since verifying that this pseudo isn't dead
1320 	 is a pain; moreover such a note won't help anything.
1321 	 If the use is a paradoxical subreg, make sure we don't add a
1322 	 REG_EQUAL note for it, because it is not equivalent, it is one
1323 	 possible value for it, but we can't rely on it holding that value.
1324 	 See PR70574.  */
1325       set_reg_equal = (note == NULL_RTX
1326 		       && REG_P (SET_DEST (use_set))
1327 		       && !REG_P (src)
1328 		       && !(GET_CODE (src) == SUBREG
1329 			    && REG_P (SUBREG_REG (src)))
1330 		       && !reg_mentioned_p (SET_DEST (use_set),
1331 					    SET_SRC (use_set))
1332 		       && !paradoxical_subreg_p (DF_REF_REG (use)));
1333     }
1334 
1335   if (GET_MODE (*loc) == VOIDmode)
1336     mode = GET_MODE (SET_DEST (use_set));
1337   else
1338     mode = GET_MODE (*loc);
1339 
1340   new_rtx = propagate_rtx (*loc, mode, reg, src,
1341   			   optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1342 
1343   if (!new_rtx)
1344     return false;
1345 
1346   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1347 }
1348 
1349 
1350 /* Given a use USE of an insn, if it has a single reaching
1351    definition, try to forward propagate it into that insn.
1352    Return true if cfg cleanup will be needed.  */
1353 
1354 static bool
forward_propagate_into(df_ref use)1355 forward_propagate_into (df_ref use)
1356 {
1357   df_ref def;
1358   rtx_insn *def_insn, *use_insn;
1359   rtx def_set;
1360   rtx parent;
1361 
1362   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1363     return false;
1364   if (DF_REF_IS_ARTIFICIAL (use))
1365     return false;
1366 
1367   /* Only consider uses that have a single definition.  */
1368   def = get_def_for_use (use);
1369   if (!def)
1370     return false;
1371   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1372     return false;
1373   if (DF_REF_IS_ARTIFICIAL (def))
1374     return false;
1375 
1376   /* Do not propagate loop invariant definitions inside the loop.  */
1377   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1378     return false;
1379 
1380   /* Check if the use is still present in the insn!  */
1381   use_insn = DF_REF_INSN (use);
1382   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1383     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1384   else
1385     parent = PATTERN (use_insn);
1386 
1387   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1388     return false;
1389 
1390   def_insn = DF_REF_INSN (def);
1391   if (multiple_sets (def_insn))
1392     return false;
1393   def_set = single_set (def_insn);
1394   if (!def_set)
1395     return false;
1396 
1397   /* Only try one kind of propagation.  If two are possible, we'll
1398      do it on the following iterations.  */
1399   if (forward_propagate_and_simplify (use, def_insn, def_set)
1400       || forward_propagate_subreg (use, def_insn, def_set))
1401     {
1402       if (cfun->can_throw_non_call_exceptions
1403 	  && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX)
1404 	  && purge_dead_edges (DF_REF_BB (use)))
1405 	return true;
1406     }
1407   return false;
1408 }
1409 
1410 
1411 static void
fwprop_init(void)1412 fwprop_init (void)
1413 {
1414   num_changes = 0;
1415   calculate_dominance_info (CDI_DOMINATORS);
1416 
1417   /* We do not always want to propagate into loops, so we have to find
1418      loops and be careful about them.  Avoid CFG modifications so that
1419      we don't have to update dominance information afterwards for
1420      build_single_def_use_links.  */
1421   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1422 
1423   build_single_def_use_links ();
1424   df_set_flags (DF_DEFER_INSN_RESCAN);
1425 
1426   active_defs = XNEWVEC (df_ref, max_reg_num ());
1427   if (flag_checking)
1428     active_defs_check = sparseset_alloc (max_reg_num ());
1429 }
1430 
1431 static void
fwprop_done(void)1432 fwprop_done (void)
1433 {
1434   loop_optimizer_finalize ();
1435 
1436   use_def_ref.release ();
1437   free (active_defs);
1438   if (flag_checking)
1439     sparseset_free (active_defs_check);
1440 
1441   free_dominance_info (CDI_DOMINATORS);
1442   cleanup_cfg (0);
1443   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1444 
1445   if (dump_file)
1446     fprintf (dump_file,
1447 	     "\nNumber of successful forward propagations: %d\n\n",
1448 	     num_changes);
1449 }
1450 
1451 
1452 /* Main entry point.  */
1453 
1454 static bool
gate_fwprop(void)1455 gate_fwprop (void)
1456 {
1457   return optimize > 0 && flag_forward_propagate;
1458 }
1459 
1460 static unsigned int
fwprop(void)1461 fwprop (void)
1462 {
1463   unsigned i;
1464   bool need_cleanup = false;
1465 
1466   fwprop_init ();
1467 
1468   /* Go through all the uses.  df_uses_create will create new ones at the
1469      end, and we'll go through them as well.
1470 
1471      Do not forward propagate addresses into loops until after unrolling.
1472      CSE did so because it was able to fix its own mess, but we are not.  */
1473 
1474   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1475     {
1476       df_ref use = DF_USES_GET (i);
1477       if (use)
1478 	if (DF_REF_TYPE (use) == DF_REF_REG_USE
1479 	    || DF_REF_BB (use)->loop_father == NULL
1480 	    /* The outer most loop is not really a loop.  */
1481 	    || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1482 	  need_cleanup |= forward_propagate_into (use);
1483     }
1484 
1485   fwprop_done ();
1486   if (need_cleanup)
1487     cleanup_cfg (0);
1488   return 0;
1489 }
1490 
1491 namespace {
1492 
1493 const pass_data pass_data_rtl_fwprop =
1494 {
1495   RTL_PASS, /* type */
1496   "fwprop1", /* name */
1497   OPTGROUP_NONE, /* optinfo_flags */
1498   TV_FWPROP, /* tv_id */
1499   0, /* properties_required */
1500   0, /* properties_provided */
1501   0, /* properties_destroyed */
1502   0, /* todo_flags_start */
1503   TODO_df_finish, /* todo_flags_finish */
1504 };
1505 
1506 class pass_rtl_fwprop : public rtl_opt_pass
1507 {
1508 public:
pass_rtl_fwprop(gcc::context * ctxt)1509   pass_rtl_fwprop (gcc::context *ctxt)
1510     : rtl_opt_pass (pass_data_rtl_fwprop, ctxt)
1511   {}
1512 
1513   /* opt_pass methods: */
gate(function *)1514   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1515   virtual unsigned int execute (function *) { return fwprop (); }
1516 
1517 }; // class pass_rtl_fwprop
1518 
1519 } // anon namespace
1520 
1521 rtl_opt_pass *
make_pass_rtl_fwprop(gcc::context * ctxt)1522 make_pass_rtl_fwprop (gcc::context *ctxt)
1523 {
1524   return new pass_rtl_fwprop (ctxt);
1525 }
1526 
1527 static unsigned int
fwprop_addr(void)1528 fwprop_addr (void)
1529 {
1530   unsigned i;
1531   bool need_cleanup = false;
1532 
1533   fwprop_init ();
1534 
1535   /* Go through all the uses.  df_uses_create will create new ones at the
1536      end, and we'll go through them as well.  */
1537   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1538     {
1539       df_ref use = DF_USES_GET (i);
1540       if (use)
1541 	if (DF_REF_TYPE (use) != DF_REF_REG_USE
1542 	    && DF_REF_BB (use)->loop_father != NULL
1543 	    /* The outer most loop is not really a loop.  */
1544 	    && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1545 	  need_cleanup |= forward_propagate_into (use);
1546     }
1547 
1548   fwprop_done ();
1549 
1550   if (need_cleanup)
1551     cleanup_cfg (0);
1552   return 0;
1553 }
1554 
1555 namespace {
1556 
1557 const pass_data pass_data_rtl_fwprop_addr =
1558 {
1559   RTL_PASS, /* type */
1560   "fwprop2", /* name */
1561   OPTGROUP_NONE, /* optinfo_flags */
1562   TV_FWPROP, /* tv_id */
1563   0, /* properties_required */
1564   0, /* properties_provided */
1565   0, /* properties_destroyed */
1566   0, /* todo_flags_start */
1567   TODO_df_finish, /* todo_flags_finish */
1568 };
1569 
1570 class pass_rtl_fwprop_addr : public rtl_opt_pass
1571 {
1572 public:
pass_rtl_fwprop_addr(gcc::context * ctxt)1573   pass_rtl_fwprop_addr (gcc::context *ctxt)
1574     : rtl_opt_pass (pass_data_rtl_fwprop_addr, ctxt)
1575   {}
1576 
1577   /* opt_pass methods: */
gate(function *)1578   virtual bool gate (function *) { return gate_fwprop (); }
execute(function *)1579   virtual unsigned int execute (function *) { return fwprop_addr (); }
1580 
1581 }; // class pass_rtl_fwprop_addr
1582 
1583 } // anon namespace
1584 
1585 rtl_opt_pass *
make_pass_rtl_fwprop_addr(gcc::context * ctxt)1586 make_pass_rtl_fwprop_addr (gcc::context *ctxt)
1587 {
1588   return new pass_rtl_fwprop_addr (ctxt);
1589 }
1590