1 /* RTL dead store elimination.
2    Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 
4    Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5    and Kenneth Zadeck <zadeck@naturalbridge.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #undef BASELINE
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "predict.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "gimple-ssa.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "alias.h"
43 #include "stor-layout.h"
44 #include "cfgrtl.h"
45 #include "cselib.h"
46 #include "tree-pass.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "dbgcnt.h"
50 #include "rtl-iter.h"
51 #include "cfgcleanup.h"
52 #include "calls.h"
53 
54 /* This file contains three techniques for performing Dead Store
55    Elimination (dse).
56 
57    * The first technique performs dse locally on any base address.  It
58    is based on the cselib which is a local value numbering technique.
59    This technique is local to a basic block but deals with a fairly
60    general addresses.
61 
62    * The second technique performs dse globally but is restricted to
63    base addresses that are either constant or are relative to the
64    frame_pointer.
65 
66    * The third technique, (which is only done after register allocation)
67    processes the spill slots.  This differs from the second
68    technique because it takes advantage of the fact that spilling is
69    completely free from the effects of aliasing.
70 
71    Logically, dse is a backwards dataflow problem.  A store can be
72    deleted if it if cannot be reached in the backward direction by any
73    use of the value being stored.  However, the local technique uses a
74    forwards scan of the basic block because cselib requires that the
75    block be processed in that order.
76 
77    The pass is logically broken into 7 steps:
78 
79    0) Initialization.
80 
81    1) The local algorithm, as well as scanning the insns for the two
82    global algorithms.
83 
84    2) Analysis to see if the global algs are necessary.  In the case
85    of stores base on a constant address, there must be at least two
86    stores to that address, to make it possible to delete some of the
87    stores.  In the case of stores off of the frame or spill related
88    stores, only one store to an address is necessary because those
89    stores die at the end of the function.
90 
91    3) Set up the global dataflow equations based on processing the
92    info parsed in the first step.
93 
94    4) Solve the dataflow equations.
95 
96    5) Delete the insns that the global analysis has indicated are
97    unnecessary.
98 
99    6) Delete insns that store the same value as preceding store
100    where the earlier store couldn't be eliminated.
101 
102    7) Cleanup.
103 
104    This step uses cselib and canon_rtx to build the largest expression
105    possible for each address.  This pass is a forwards pass through
106    each basic block.  From the point of view of the global technique,
107    the first pass could examine a block in either direction.  The
108    forwards ordering is to accommodate cselib.
109 
110    We make a simplifying assumption: addresses fall into four broad
111    categories:
112 
113    1) base has rtx_varies_p == false, offset is constant.
114    2) base has rtx_varies_p == false, offset variable.
115    3) base has rtx_varies_p == true, offset constant.
116    4) base has rtx_varies_p == true, offset variable.
117 
118    The local passes are able to process all 4 kinds of addresses.  The
119    global pass only handles 1).
120 
121    The global problem is formulated as follows:
122 
123      A store, S1, to address A, where A is not relative to the stack
124      frame, can be eliminated if all paths from S1 to the end of the
125      function contain another store to A before a read to A.
126 
127      If the address A is relative to the stack frame, a store S2 to A
128      can be eliminated if there are no paths from S2 that reach the
129      end of the function that read A before another store to A.  In
130      this case S2 can be deleted if there are paths from S2 to the
131      end of the function that have no reads or writes to A.  This
132      second case allows stores to the stack frame to be deleted that
133      would otherwise die when the function returns.  This cannot be
134      done if stores_off_frame_dead_at_return is not true.  See the doc
135      for that variable for when this variable is false.
136 
137      The global problem is formulated as a backwards set union
138      dataflow problem where the stores are the gens and reads are the
139      kills.  Set union problems are rare and require some special
140      handling given our representation of bitmaps.  A straightforward
141      implementation requires a lot of bitmaps filled with 1s.
142      These are expensive and cumbersome in our bitmap formulation so
143      care has been taken to avoid large vectors filled with 1s.  See
144      the comments in bb_info and in the dataflow confluence functions
145      for details.
146 
147    There are two places for further enhancements to this algorithm:
148 
149    1) The original dse which was embedded in a pass called flow also
150    did local address forwarding.  For example in
151 
152    A <- r100
153    ... <- A
154 
155    flow would replace the right hand side of the second insn with a
156    reference to r100.  Most of the information is available to add this
157    to this pass.  It has not done it because it is a lot of work in
158    the case that either r100 is assigned to between the first and
159    second insn and/or the second insn is a load of part of the value
160    stored by the first insn.
161 
162    insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163    insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164    insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165    insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166 
167    2) The cleaning up of spill code is quite profitable.  It currently
168    depends on reading tea leaves and chicken entrails left by reload.
169    This pass depends on reload creating a singleton alias set for each
170    spill slot and telling the next dse pass which of these alias sets
171    are the singletons.  Rather than analyze the addresses of the
172    spills, dse's spill processing just does analysis of the loads and
173    stores that use those alias sets.  There are three cases where this
174    falls short:
175 
176      a) Reload sometimes creates the slot for one mode of access, and
177      then inserts loads and/or stores for a smaller mode.  In this
178      case, the current code just punts on the slot.  The proper thing
179      to do is to back out and use one bit vector position for each
180      byte of the entity associated with the slot.  This depends on
181      KNOWING that reload always generates the accesses for each of the
182      bytes in some canonical (read that easy to understand several
183      passes after reload happens) way.
184 
185      b) Reload sometimes decides that spill slot it allocated was not
186      large enough for the mode and goes back and allocates more slots
187      with the same mode and alias set.  The backout in this case is a
188      little more graceful than (a).  In this case the slot is unmarked
189      as being a spill slot and if final address comes out to be based
190      off the frame pointer, the global algorithm handles this slot.
191 
192      c) For any pass that may prespill, there is currently no
193      mechanism to tell the dse pass that the slot being used has the
194      special properties that reload uses.  It may be that all that is
195      required is to have those passes make the same calls that reload
196      does, assuming that the alias sets can be manipulated in the same
197      way.  */
198 
199 /* There are limits to the size of constant offsets we model for the
200    global problem.  There are certainly test cases, that exceed this
201    limit, however, it is unlikely that there are important programs
202    that really have constant offsets this size.  */
203 #define MAX_OFFSET (64 * 1024)
204 
205 /* Obstack for the DSE dataflow bitmaps.  We don't want to put these
206    on the default obstack because these bitmaps can grow quite large
207    (~2GB for the small (!) test case of PR54146) and we'll hold on to
208    all that memory until the end of the compiler run.
209    As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210    releasing the whole obstack.  */
211 static bitmap_obstack dse_bitmap_obstack;
212 
213 /* Obstack for other data.  As for above: Kinda nice to be able to
214    throw it all away at the end in one big sweep.  */
215 static struct obstack dse_obstack;
216 
217 /* Scratch bitmap for cselib's cselib_expand_value_rtx.  */
218 static bitmap scratch = NULL;
219 
220 struct insn_info_type;
221 
222 /* This structure holds information about a candidate store.  */
223 class store_info
224 {
225 public:
226 
227   /* False means this is a clobber.  */
228   bool is_set;
229 
230   /* False if a single HOST_WIDE_INT bitmap is used for positions_needed.  */
231   bool is_large;
232 
233   /* The id of the mem group of the base address.  If rtx_varies_p is
234      true, this is -1.  Otherwise, it is the index into the group
235      table.  */
236   int group_id;
237 
238   /* This is the cselib value.  */
239   cselib_val *cse_base;
240 
241   /* This canonized mem.  */
242   rtx mem;
243 
244   /* Canonized MEM address for use by canon_true_dependence.  */
245   rtx mem_addr;
246 
247   /* The offset of the first byte associated with the operation.  */
248   poly_int64 offset;
249 
250   /* The number of bytes covered by the operation.  This is always exact
251      and known (rather than -1).  */
252   poly_int64 width;
253 
254   union
255     {
256       /* A bitmask as wide as the number of bytes in the word that
257 	 contains a 1 if the byte may be needed.  The store is unused if
258 	 all of the bits are 0.  This is used if IS_LARGE is false.  */
259       unsigned HOST_WIDE_INT small_bitmask;
260 
261       struct
262 	{
263 	  /* A bitmap with one bit per byte, or null if the number of
264 	     bytes isn't known at compile time.  A cleared bit means
265 	     the position is needed.  Used if IS_LARGE is true.  */
266 	  bitmap bmap;
267 
268 	  /* When BITMAP is nonnull, this counts the number of set bits
269 	     (i.e. unneeded bytes) in the bitmap.  If it is equal to
270 	     WIDTH, the whole store is unused.
271 
272 	     When BITMAP is null:
273 	     - the store is definitely not needed when COUNT == 1
274 	     - all the store is needed when COUNT == 0 and RHS is nonnull
275 	     - otherwise we don't know which parts of the store are needed.  */
276 	  int count;
277 	} large;
278     } positions_needed;
279 
280   /* The next store info for this insn.  */
281   class store_info *next;
282 
283   /* The right hand side of the store.  This is used if there is a
284      subsequent reload of the mems address somewhere later in the
285      basic block.  */
286   rtx rhs;
287 
288   /* If rhs is or holds a constant, this contains that constant,
289      otherwise NULL.  */
290   rtx const_rhs;
291 
292   /* Set if this store stores the same constant value as REDUNDANT_REASON
293      insn stored.  These aren't eliminated early, because doing that
294      might prevent the earlier larger store to be eliminated.  */
295   struct insn_info_type *redundant_reason;
296 };
297 
298 /* Return a bitmask with the first N low bits set.  */
299 
300 static unsigned HOST_WIDE_INT
lowpart_bitmask(int n)301 lowpart_bitmask (int n)
302 {
303   unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
304   return mask >> (HOST_BITS_PER_WIDE_INT - n);
305 }
306 
307 static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
308 
309 static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
310 
311 /* This structure holds information about a load.  These are only
312    built for rtx bases.  */
313 class read_info_type
314 {
315 public:
316   /* The id of the mem group of the base address.  */
317   int group_id;
318 
319   /* The offset of the first byte associated with the operation.  */
320   poly_int64 offset;
321 
322   /* The number of bytes covered by the operation, or -1 if not known.  */
323   poly_int64 width;
324 
325   /* The mem being read.  */
326   rtx mem;
327 
328   /* The next read_info for this insn.  */
329   class read_info_type *next;
330 };
331 typedef class read_info_type *read_info_t;
332 
333 static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
334 
335 /* One of these records is created for each insn.  */
336 
337 struct insn_info_type
338 {
339   /* Set true if the insn contains a store but the insn itself cannot
340      be deleted.  This is set if the insn is a parallel and there is
341      more than one non dead output or if the insn is in some way
342      volatile.  */
343   bool cannot_delete;
344 
345   /* This field is only used by the global algorithm.  It is set true
346      if the insn contains any read of mem except for a (1).  This is
347      also set if the insn is a call or has a clobber mem.  If the insn
348      contains a wild read, the use_rec will be null.  */
349   bool wild_read;
350 
351   /* This is true only for CALL instructions which could potentially read
352      any non-frame memory location. This field is used by the global
353      algorithm.  */
354   bool non_frame_wild_read;
355 
356   /* This field is only used for the processing of const functions.
357      These functions cannot read memory, but they can read the stack
358      because that is where they may get their parms.  We need to be
359      this conservative because, like the store motion pass, we don't
360      consider CALL_INSN_FUNCTION_USAGE when processing call insns.
361      Moreover, we need to distinguish two cases:
362      1. Before reload (register elimination), the stores related to
363 	outgoing arguments are stack pointer based and thus deemed
364 	of non-constant base in this pass.  This requires special
365 	handling but also means that the frame pointer based stores
366 	need not be killed upon encountering a const function call.
367      2. After reload, the stores related to outgoing arguments can be
368 	either stack pointer or hard frame pointer based.  This means
369 	that we have no other choice than also killing all the frame
370 	pointer based stores upon encountering a const function call.
371      This field is set after reload for const function calls and before
372      reload for const tail function calls on targets where arg pointer
373      is the frame pointer.  Having this set is less severe than a wild
374      read, it just means that all the frame related stores are killed
375      rather than all the stores.  */
376   bool frame_read;
377 
378   /* This field is only used for the processing of const functions.
379      It is set if the insn may contain a stack pointer based store.  */
380   bool stack_pointer_based;
381 
382   /* This is true if any of the sets within the store contains a
383      cselib base.  Such stores can only be deleted by the local
384      algorithm.  */
385   bool contains_cselib_groups;
386 
387   /* The insn. */
388   rtx_insn *insn;
389 
390   /* The list of mem sets or mem clobbers that are contained in this
391      insn.  If the insn is deletable, it contains only one mem set.
392      But it could also contain clobbers.  Insns that contain more than
393      one mem set are not deletable, but each of those mems are here in
394      order to provide info to delete other insns.  */
395   store_info *store_rec;
396 
397   /* The linked list of mem uses in this insn.  Only the reads from
398      rtx bases are listed here.  The reads to cselib bases are
399      completely processed during the first scan and so are never
400      created.  */
401   read_info_t read_rec;
402 
403   /* The live fixed registers.  We assume only fixed registers can
404      cause trouble by being clobbered from an expanded pattern;
405      storing only the live fixed registers (rather than all registers)
406      means less memory needs to be allocated / copied for the individual
407      stores.  */
408   regset fixed_regs_live;
409 
410   /* The prev insn in the basic block.  */
411   struct insn_info_type * prev_insn;
412 
413   /* The linked list of insns that are in consideration for removal in
414      the forwards pass through the basic block.  This pointer may be
415      trash as it is not cleared when a wild read occurs.  The only
416      time it is guaranteed to be correct is when the traversal starts
417      at active_local_stores.  */
418   struct insn_info_type * next_local_store;
419 };
420 typedef struct insn_info_type *insn_info_t;
421 
422 static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
423 
424 /* The linked list of stores that are under consideration in this
425    basic block.  */
426 static insn_info_t active_local_stores;
427 static int active_local_stores_len;
428 
429 struct dse_bb_info_type
430 {
431   /* Pointer to the insn info for the last insn in the block.  These
432      are linked so this is how all of the insns are reached.  During
433      scanning this is the current insn being scanned.  */
434   insn_info_t last_insn;
435 
436   /* The info for the global dataflow problem.  */
437 
438 
439   /* This is set if the transfer function should and in the wild_read
440      bitmap before applying the kill and gen sets.  That vector knocks
441      out most of the bits in the bitmap and thus speeds up the
442      operations.  */
443   bool apply_wild_read;
444 
445   /* The following 4 bitvectors hold information about which positions
446      of which stores are live or dead.  They are indexed by
447      get_bitmap_index.  */
448 
449   /* The set of store positions that exist in this block before a wild read.  */
450   bitmap gen;
451 
452   /* The set of load positions that exist in this block above the
453      same position of a store.  */
454   bitmap kill;
455 
456   /* The set of stores that reach the top of the block without being
457      killed by a read.
458 
459      Do not represent the in if it is all ones.  Note that this is
460      what the bitvector should logically be initialized to for a set
461      intersection problem.  However, like the kill set, this is too
462      expensive.  So initially, the in set will only be created for the
463      exit block and any block that contains a wild read.  */
464   bitmap in;
465 
466   /* The set of stores that reach the bottom of the block from it's
467      successors.
468 
469      Do not represent the in if it is all ones.  Note that this is
470      what the bitvector should logically be initialized to for a set
471      intersection problem.  However, like the kill and in set, this is
472      too expensive.  So what is done is that the confluence operator
473      just initializes the vector from one of the out sets of the
474      successors of the block.  */
475   bitmap out;
476 
477   /* The following bitvector is indexed by the reg number.  It
478      contains the set of regs that are live at the current instruction
479      being processed.  While it contains info for all of the
480      registers, only the hard registers are actually examined.  It is used
481      to assure that shift and/or add sequences that are inserted do not
482      accidentally clobber live hard regs.  */
483   bitmap regs_live;
484 };
485 
486 typedef struct dse_bb_info_type *bb_info_t;
487 
488 static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
489   ("bb_info_pool");
490 
491 /* Table to hold all bb_infos.  */
492 static bb_info_t *bb_table;
493 
494 /* There is a group_info for each rtx base that is used to reference
495    memory.  There are also not many of the rtx bases because they are
496    very limited in scope.  */
497 
498 struct group_info
499 {
500   /* The actual base of the address.  */
501   rtx rtx_base;
502 
503   /* The sequential id of the base.  This allows us to have a
504      canonical ordering of these that is not based on addresses.  */
505   int id;
506 
507   /* True if there are any positions that are to be processed
508      globally.  */
509   bool process_globally;
510 
511   /* True if the base of this group is either the frame_pointer or
512      hard_frame_pointer.  */
513   bool frame_related;
514 
515   /* A mem wrapped around the base pointer for the group in order to do
516      read dependency.  It must be given BLKmode in order to encompass all
517      the possible offsets from the base.  */
518   rtx base_mem;
519 
520   /* Canonized version of base_mem's address.  */
521   rtx canon_base_addr;
522 
523   /* These two sets of two bitmaps are used to keep track of how many
524      stores are actually referencing that position from this base.  We
525      only do this for rtx bases as this will be used to assign
526      positions in the bitmaps for the global problem.  Bit N is set in
527      store1 on the first store for offset N.  Bit N is set in store2
528      for the second store to offset N.  This is all we need since we
529      only care about offsets that have two or more stores for them.
530 
531      The "_n" suffix is for offsets less than 0 and the "_p" suffix is
532      for 0 and greater offsets.
533 
534      There is one special case here, for stores into the stack frame,
535      we will or store1 into store2 before deciding which stores look
536      at globally.  This is because stores to the stack frame that have
537      no other reads before the end of the function can also be
538      deleted.  */
539   bitmap store1_n, store1_p, store2_n, store2_p;
540 
541   /* These bitmaps keep track of offsets in this group escape this function.
542      An offset escapes if it corresponds to a named variable whose
543      addressable flag is set.  */
544   bitmap escaped_n, escaped_p;
545 
546   /* The positions in this bitmap have the same assignments as the in,
547      out, gen and kill bitmaps.  This bitmap is all zeros except for
548      the positions that are occupied by stores for this group.  */
549   bitmap group_kill;
550 
551   /* The offset_map is used to map the offsets from this base into
552      positions in the global bitmaps.  It is only created after all of
553      the all of stores have been scanned and we know which ones we
554      care about.  */
555   int *offset_map_n, *offset_map_p;
556   int offset_map_size_n, offset_map_size_p;
557 };
558 
559 static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
560 
561 /* Index into the rtx_group_vec.  */
562 static int rtx_group_next_id;
563 
564 
565 static vec<group_info *> rtx_group_vec;
566 
567 
568 /* This structure holds the set of changes that are being deferred
569    when removing read operation.  See replace_read.  */
570 struct deferred_change
571 {
572 
573   /* The mem that is being replaced.  */
574   rtx *loc;
575 
576   /* The reg it is being replaced with.  */
577   rtx reg;
578 
579   struct deferred_change *next;
580 };
581 
582 static object_allocator<deferred_change> deferred_change_pool
583   ("deferred_change_pool");
584 
585 static deferred_change *deferred_change_list = NULL;
586 
587 /* This is true except if cfun->stdarg -- i.e. we cannot do
588    this for vararg functions because they play games with the frame.  */
589 static bool stores_off_frame_dead_at_return;
590 
591 /* Counter for stats.  */
592 static int globally_deleted;
593 static int locally_deleted;
594 
595 static bitmap all_blocks;
596 
597 /* Locations that are killed by calls in the global phase.  */
598 static bitmap kill_on_calls;
599 
600 /* The number of bits used in the global bitmaps.  */
601 static unsigned int current_position;
602 
603 /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE.  */
604 
605 static void
print_range(FILE * file,poly_int64 offset,poly_int64 width)606 print_range (FILE *file, poly_int64 offset, poly_int64 width)
607 {
608   fprintf (file, "[");
609   print_dec (offset, file, SIGNED);
610   fprintf (file, "..");
611   print_dec (offset + width, file, SIGNED);
612   fprintf (file, ")");
613 }
614 
615 /*----------------------------------------------------------------------------
616    Zeroth step.
617 
618    Initialization.
619 ----------------------------------------------------------------------------*/
620 
621 
622 /* Hashtable callbacks for maintaining the "bases" field of
623    store_group_info, given that the addresses are function invariants.  */
624 
625 struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
626 {
627   static inline hashval_t hash (const group_info *);
628   static inline bool equal (const group_info *, const group_info *);
629 };
630 
631 inline bool
equal(const group_info * gi1,const group_info * gi2)632 invariant_group_base_hasher::equal (const group_info *gi1,
633 				    const group_info *gi2)
634 {
635   return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
636 }
637 
638 inline hashval_t
hash(const group_info * gi)639 invariant_group_base_hasher::hash (const group_info *gi)
640 {
641   int do_not_record;
642   return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
643 }
644 
645 /* Tables of group_info structures, hashed by base value.  */
646 static hash_table<invariant_group_base_hasher> *rtx_group_table;
647 
648 
649 /* Get the GROUP for BASE.  Add a new group if it is not there.  */
650 
651 static group_info *
get_group_info(rtx base)652 get_group_info (rtx base)
653 {
654   struct group_info tmp_gi;
655   group_info *gi;
656   group_info **slot;
657 
658   gcc_assert (base != NULL_RTX);
659 
660   /* Find the store_base_info structure for BASE, creating a new one
661      if necessary.  */
662   tmp_gi.rtx_base = base;
663   slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
664   gi = *slot;
665 
666   if (gi == NULL)
667     {
668       *slot = gi = group_info_pool.allocate ();
669       gi->rtx_base = base;
670       gi->id = rtx_group_next_id++;
671       gi->base_mem = gen_rtx_MEM (BLKmode, base);
672       gi->canon_base_addr = canon_rtx (base);
673       gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
674       gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
675       gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
676       gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
677       gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
678       gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
679       gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
680       gi->process_globally = false;
681       gi->frame_related =
682 	(base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx);
683       gi->offset_map_size_n = 0;
684       gi->offset_map_size_p = 0;
685       gi->offset_map_n = NULL;
686       gi->offset_map_p = NULL;
687       rtx_group_vec.safe_push (gi);
688     }
689 
690   return gi;
691 }
692 
693 
694 /* Initialization of data structures.  */
695 
696 static void
dse_step0(void)697 dse_step0 (void)
698 {
699   locally_deleted = 0;
700   globally_deleted = 0;
701 
702   bitmap_obstack_initialize (&dse_bitmap_obstack);
703   gcc_obstack_init (&dse_obstack);
704 
705   scratch = BITMAP_ALLOC (&reg_obstack);
706   kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
707 
708 
709   rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
710 
711   bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
712   rtx_group_next_id = 0;
713 
714   stores_off_frame_dead_at_return = !cfun->stdarg;
715 
716   init_alias_analysis ();
717 }
718 
719 
720 
721 /*----------------------------------------------------------------------------
722    First step.
723 
724    Scan all of the insns.  Any random ordering of the blocks is fine.
725    Each block is scanned in forward order to accommodate cselib which
726    is used to remove stores with non-constant bases.
727 ----------------------------------------------------------------------------*/
728 
729 /* Delete all of the store_info recs from INSN_INFO.  */
730 
731 static void
free_store_info(insn_info_t insn_info)732 free_store_info (insn_info_t insn_info)
733 {
734   store_info *cur = insn_info->store_rec;
735   while (cur)
736     {
737       store_info *next = cur->next;
738       if (cur->is_large)
739 	BITMAP_FREE (cur->positions_needed.large.bmap);
740       if (cur->cse_base)
741 	cse_store_info_pool.remove (cur);
742       else
743 	rtx_store_info_pool.remove (cur);
744       cur = next;
745     }
746 
747   insn_info->cannot_delete = true;
748   insn_info->contains_cselib_groups = false;
749   insn_info->store_rec = NULL;
750 }
751 
752 struct note_add_store_info
753 {
754   rtx_insn *first, *current;
755   regset fixed_regs_live;
756   bool failure;
757 };
758 
759 /* Callback for emit_inc_dec_insn_before via note_stores.
760    Check if a register is clobbered which is live afterwards.  */
761 
762 static void
note_add_store(rtx loc,const_rtx expr ATTRIBUTE_UNUSED,void * data)763 note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
764 {
765   rtx_insn *insn;
766   note_add_store_info *info = (note_add_store_info *) data;
767 
768   if (!REG_P (loc))
769     return;
770 
771   /* If this register is referenced by the current or an earlier insn,
772      that's OK.  E.g. this applies to the register that is being incremented
773      with this addition.  */
774   for (insn = info->first;
775        insn != NEXT_INSN (info->current);
776        insn = NEXT_INSN (insn))
777     if (reg_referenced_p (loc, PATTERN (insn)))
778       return;
779 
780   /* If we come here, we have a clobber of a register that's only OK
781      if that register is not live.  If we don't have liveness information
782      available, fail now.  */
783   if (!info->fixed_regs_live)
784     {
785       info->failure = true;
786       return;
787     }
788   /* Now check if this is a live fixed register.  */
789   unsigned int end_regno = END_REGNO (loc);
790   for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
791     if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
792       info->failure = true;
793 }
794 
795 /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
796    SRC + SRCOFF before insn ARG.  */
797 
798 static int
emit_inc_dec_insn_before(rtx mem ATTRIBUTE_UNUSED,rtx op ATTRIBUTE_UNUSED,rtx dest,rtx src,rtx srcoff,void * arg)799 emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
800 			  rtx op ATTRIBUTE_UNUSED,
801 			  rtx dest, rtx src, rtx srcoff, void *arg)
802 {
803   insn_info_t insn_info = (insn_info_t) arg;
804   rtx_insn *insn = insn_info->insn, *new_insn, *cur;
805   note_add_store_info info;
806 
807   /* We can reuse all operands without copying, because we are about
808      to delete the insn that contained it.  */
809   if (srcoff)
810     {
811       start_sequence ();
812       emit_insn (gen_add3_insn (dest, src, srcoff));
813       new_insn = get_insns ();
814       end_sequence ();
815     }
816   else
817     new_insn = gen_move_insn (dest, src);
818   info.first = new_insn;
819   info.fixed_regs_live = insn_info->fixed_regs_live;
820   info.failure = false;
821   for (cur = new_insn; cur; cur = NEXT_INSN (cur))
822     {
823       info.current = cur;
824       note_stores (cur, note_add_store, &info);
825     }
826 
827   /* If a failure was flagged above, return 1 so that for_each_inc_dec will
828      return it immediately, communicating the failure to its caller.  */
829   if (info.failure)
830     return 1;
831 
832   emit_insn_before (new_insn, insn);
833 
834   return 0;
835 }
836 
837 /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
838    is there, is split into a separate insn.
839    Return true on success (or if there was nothing to do), false on failure.  */
840 
841 static bool
check_for_inc_dec_1(insn_info_t insn_info)842 check_for_inc_dec_1 (insn_info_t insn_info)
843 {
844   rtx_insn *insn = insn_info->insn;
845   rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
846   if (note)
847     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
848 			     insn_info) == 0;
849 
850   /* Punt on stack pushes, those don't have REG_INC notes and we are
851      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
852   subrtx_iterator::array_type array;
853   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
854     {
855       const_rtx x = *iter;
856       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
857 	return false;
858     }
859 
860   return true;
861 }
862 
863 
864 /* Entry point for postreload.  If you work on reload_cse, or you need this
865    anywhere else, consider if you can provide register liveness information
866    and add a parameter to this function so that it can be passed down in
867    insn_info.fixed_regs_live.  */
868 bool
check_for_inc_dec(rtx_insn * insn)869 check_for_inc_dec (rtx_insn *insn)
870 {
871   insn_info_type insn_info;
872   rtx note;
873 
874   insn_info.insn = insn;
875   insn_info.fixed_regs_live = NULL;
876   note = find_reg_note (insn, REG_INC, NULL_RTX);
877   if (note)
878     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
879 			     &insn_info) == 0;
880 
881   /* Punt on stack pushes, those don't have REG_INC notes and we are
882      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
883   subrtx_iterator::array_type array;
884   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
885     {
886       const_rtx x = *iter;
887       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
888 	return false;
889     }
890 
891   return true;
892 }
893 
894 /* Delete the insn and free all of the fields inside INSN_INFO.  */
895 
896 static void
delete_dead_store_insn(insn_info_t insn_info)897 delete_dead_store_insn (insn_info_t insn_info)
898 {
899   read_info_t read_info;
900 
901   if (!dbg_cnt (dse))
902     return;
903 
904   if (!check_for_inc_dec_1 (insn_info))
905     return;
906   if (dump_file && (dump_flags & TDF_DETAILS))
907     fprintf (dump_file, "Locally deleting insn %d\n",
908 	     INSN_UID (insn_info->insn));
909 
910   free_store_info (insn_info);
911   read_info = insn_info->read_rec;
912 
913   while (read_info)
914     {
915       read_info_t next = read_info->next;
916       read_info_type_pool.remove (read_info);
917       read_info = next;
918     }
919   insn_info->read_rec = NULL;
920 
921   delete_insn (insn_info->insn);
922   locally_deleted++;
923   insn_info->insn = NULL;
924 
925   insn_info->wild_read = false;
926 }
927 
928 /* Return whether DECL, a local variable, can possibly escape the current
929    function scope.  */
930 
931 static bool
local_variable_can_escape(tree decl)932 local_variable_can_escape (tree decl)
933 {
934   if (TREE_ADDRESSABLE (decl))
935     return true;
936 
937   /* If this is a partitioned variable, we need to consider all the variables
938      in the partition.  This is necessary because a store into one of them can
939      be replaced with a store into another and this may not change the outcome
940      of the escape analysis.  */
941   if (cfun->gimple_df->decls_to_pointers != NULL)
942     {
943       tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
944       if (namep)
945 	return TREE_ADDRESSABLE (*namep);
946     }
947 
948   return false;
949 }
950 
951 /* Return whether EXPR can possibly escape the current function scope.  */
952 
953 static bool
can_escape(tree expr)954 can_escape (tree expr)
955 {
956   tree base;
957   if (!expr)
958     return true;
959   base = get_base_address (expr);
960   if (DECL_P (base)
961       && !may_be_aliased (base)
962       && !(VAR_P (base)
963 	   && !DECL_EXTERNAL (base)
964 	   && !TREE_STATIC (base)
965 	   && local_variable_can_escape (base)))
966     return false;
967   return true;
968 }
969 
970 /* Set the store* bitmaps offset_map_size* fields in GROUP based on
971    OFFSET and WIDTH.  */
972 
973 static void
set_usage_bits(group_info * group,poly_int64 offset,poly_int64 width,tree expr)974 set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
975                 tree expr)
976 {
977   /* Non-constant offsets and widths act as global kills, so there's no point
978      trying to use them to derive global DSE candidates.  */
979   HOST_WIDE_INT i, const_offset, const_width;
980   bool expr_escapes = can_escape (expr);
981   if (offset.is_constant (&const_offset)
982       && width.is_constant (&const_width)
983       && const_offset > -MAX_OFFSET
984       && const_offset + const_width < MAX_OFFSET)
985     for (i = const_offset; i < const_offset + const_width; ++i)
986       {
987 	bitmap store1;
988 	bitmap store2;
989         bitmap escaped;
990 	int ai;
991 	if (i < 0)
992 	  {
993 	    store1 = group->store1_n;
994 	    store2 = group->store2_n;
995 	    escaped = group->escaped_n;
996 	    ai = -i;
997 	  }
998 	else
999 	  {
1000 	    store1 = group->store1_p;
1001 	    store2 = group->store2_p;
1002 	    escaped = group->escaped_p;
1003 	    ai = i;
1004 	  }
1005 
1006 	if (!bitmap_set_bit (store1, ai))
1007 	  bitmap_set_bit (store2, ai);
1008 	else
1009 	  {
1010 	    if (i < 0)
1011 	      {
1012 		if (group->offset_map_size_n < ai)
1013 		  group->offset_map_size_n = ai;
1014 	      }
1015 	    else
1016 	      {
1017 		if (group->offset_map_size_p < ai)
1018 		  group->offset_map_size_p = ai;
1019 	      }
1020 	  }
1021         if (expr_escapes)
1022           bitmap_set_bit (escaped, ai);
1023       }
1024 }
1025 
1026 static void
reset_active_stores(void)1027 reset_active_stores (void)
1028 {
1029   active_local_stores = NULL;
1030   active_local_stores_len = 0;
1031 }
1032 
1033 /* Free all READ_REC of the LAST_INSN of BB_INFO.  */
1034 
1035 static void
free_read_records(bb_info_t bb_info)1036 free_read_records (bb_info_t bb_info)
1037 {
1038   insn_info_t insn_info = bb_info->last_insn;
1039   read_info_t *ptr = &insn_info->read_rec;
1040   while (*ptr)
1041     {
1042       read_info_t next = (*ptr)->next;
1043       read_info_type_pool.remove (*ptr);
1044       *ptr = next;
1045     }
1046 }
1047 
1048 /* Set the BB_INFO so that the last insn is marked as a wild read.  */
1049 
1050 static void
add_wild_read(bb_info_t bb_info)1051 add_wild_read (bb_info_t bb_info)
1052 {
1053   insn_info_t insn_info = bb_info->last_insn;
1054   insn_info->wild_read = true;
1055   free_read_records (bb_info);
1056   reset_active_stores ();
1057 }
1058 
1059 /* Set the BB_INFO so that the last insn is marked as a wild read of
1060    non-frame locations.  */
1061 
1062 static void
add_non_frame_wild_read(bb_info_t bb_info)1063 add_non_frame_wild_read (bb_info_t bb_info)
1064 {
1065   insn_info_t insn_info = bb_info->last_insn;
1066   insn_info->non_frame_wild_read = true;
1067   free_read_records (bb_info);
1068   reset_active_stores ();
1069 }
1070 
1071 /* Return true if X is a constant or one of the registers that behave
1072    as a constant over the life of a function.  This is equivalent to
1073    !rtx_varies_p for memory addresses.  */
1074 
1075 static bool
const_or_frame_p(rtx x)1076 const_or_frame_p (rtx x)
1077 {
1078   if (CONSTANT_P (x))
1079     return true;
1080 
1081   if (GET_CODE (x) == REG)
1082     {
1083       /* Note that we have to test for the actual rtx used for the frame
1084 	 and arg pointers and not just the register number in case we have
1085 	 eliminated the frame and/or arg pointer and are using it
1086 	 for pseudos.  */
1087       if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1088 	  /* The arg pointer varies if it is not a fixed register.  */
1089 	  || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1090 	  || x == pic_offset_table_rtx)
1091 	return true;
1092       return false;
1093     }
1094 
1095   return false;
1096 }
1097 
1098 /* Take all reasonable action to put the address of MEM into the form
1099    that we can do analysis on.
1100 
1101    The gold standard is to get the address into the form: address +
1102    OFFSET where address is something that rtx_varies_p considers a
1103    constant.  When we can get the address in this form, we can do
1104    global analysis on it.  Note that for constant bases, address is
1105    not actually returned, only the group_id.  The address can be
1106    obtained from that.
1107 
1108    If that fails, we try cselib to get a value we can at least use
1109    locally.  If that fails we return false.
1110 
1111    The GROUP_ID is set to -1 for cselib bases and the index of the
1112    group for non_varying bases.
1113 
1114    FOR_READ is true if this is a mem read and false if not.  */
1115 
1116 static bool
canon_address(rtx mem,int * group_id,poly_int64 * offset,cselib_val ** base)1117 canon_address (rtx mem,
1118 	       int *group_id,
1119 	       poly_int64 *offset,
1120 	       cselib_val **base)
1121 {
1122   machine_mode address_mode = get_address_mode (mem);
1123   rtx mem_address = XEXP (mem, 0);
1124   rtx expanded_address, address;
1125   int expanded;
1126 
1127   cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1128 
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       fprintf (dump_file, "  mem: ");
1132       print_inline_rtx (dump_file, mem_address, 0);
1133       fprintf (dump_file, "\n");
1134     }
1135 
1136   /* First see if just canon_rtx (mem_address) is const or frame,
1137      if not, try cselib_expand_value_rtx and call canon_rtx on that.  */
1138   address = NULL_RTX;
1139   for (expanded = 0; expanded < 2; expanded++)
1140     {
1141       if (expanded)
1142 	{
1143 	  /* Use cselib to replace all of the reg references with the full
1144 	     expression.  This will take care of the case where we have
1145 
1146 	     r_x = base + offset;
1147 	     val = *r_x;
1148 
1149 	     by making it into
1150 
1151 	     val = *(base + offset);  */
1152 
1153 	  expanded_address = cselib_expand_value_rtx (mem_address,
1154 						      scratch, 5);
1155 
1156 	  /* If this fails, just go with the address from first
1157 	     iteration.  */
1158 	  if (!expanded_address)
1159 	    break;
1160 	}
1161       else
1162 	expanded_address = mem_address;
1163 
1164       /* Split the address into canonical BASE + OFFSET terms.  */
1165       address = canon_rtx (expanded_address);
1166 
1167       *offset = 0;
1168 
1169       if (dump_file && (dump_flags & TDF_DETAILS))
1170 	{
1171 	  if (expanded)
1172 	    {
1173 	      fprintf (dump_file, "\n   after cselib_expand address: ");
1174 	      print_inline_rtx (dump_file, expanded_address, 0);
1175 	      fprintf (dump_file, "\n");
1176 	    }
1177 
1178 	  fprintf (dump_file, "\n   after canon_rtx address: ");
1179 	  print_inline_rtx (dump_file, address, 0);
1180 	  fprintf (dump_file, "\n");
1181 	}
1182 
1183       if (GET_CODE (address) == CONST)
1184 	address = XEXP (address, 0);
1185 
1186       address = strip_offset_and_add (address, offset);
1187 
1188       if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1189 	  && const_or_frame_p (address))
1190 	{
1191 	  group_info *group = get_group_info (address);
1192 
1193 	  if (dump_file && (dump_flags & TDF_DETAILS))
1194 	    {
1195 	      fprintf (dump_file, "  gid=%d offset=", group->id);
1196 	      print_dec (*offset, dump_file);
1197 	      fprintf (dump_file, "\n");
1198 	    }
1199 	  *base = NULL;
1200 	  *group_id = group->id;
1201 	  return true;
1202 	}
1203     }
1204 
1205   *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1206   *group_id = -1;
1207 
1208   if (*base == NULL)
1209     {
1210       if (dump_file && (dump_flags & TDF_DETAILS))
1211 	fprintf (dump_file, " no cselib val - should be a wild read.\n");
1212       return false;
1213     }
1214   if (dump_file && (dump_flags & TDF_DETAILS))
1215     {
1216       fprintf (dump_file, "  varying cselib base=%u:%u offset = ",
1217 	       (*base)->uid, (*base)->hash);
1218       print_dec (*offset, dump_file);
1219       fprintf (dump_file, "\n");
1220     }
1221   return true;
1222 }
1223 
1224 
1225 /* Clear the rhs field from the active_local_stores array.  */
1226 
1227 static void
clear_rhs_from_active_local_stores(void)1228 clear_rhs_from_active_local_stores (void)
1229 {
1230   insn_info_t ptr = active_local_stores;
1231 
1232   while (ptr)
1233     {
1234       store_info *store_info = ptr->store_rec;
1235       /* Skip the clobbers.  */
1236       while (!store_info->is_set)
1237 	store_info = store_info->next;
1238 
1239       store_info->rhs = NULL;
1240       store_info->const_rhs = NULL;
1241 
1242       ptr = ptr->next_local_store;
1243     }
1244 }
1245 
1246 
1247 /* Mark byte POS bytes from the beginning of store S_INFO as unneeded.  */
1248 
1249 static inline void
set_position_unneeded(store_info * s_info,int pos)1250 set_position_unneeded (store_info *s_info, int pos)
1251 {
1252   if (__builtin_expect (s_info->is_large, false))
1253     {
1254       if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1255 	s_info->positions_needed.large.count++;
1256     }
1257   else
1258     s_info->positions_needed.small_bitmask
1259       &= ~(HOST_WIDE_INT_1U << pos);
1260 }
1261 
1262 /* Mark the whole store S_INFO as unneeded.  */
1263 
1264 static inline void
set_all_positions_unneeded(store_info * s_info)1265 set_all_positions_unneeded (store_info *s_info)
1266 {
1267   if (__builtin_expect (s_info->is_large, false))
1268     {
1269       HOST_WIDE_INT width;
1270       if (s_info->width.is_constant (&width))
1271 	{
1272 	  bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1273 	  s_info->positions_needed.large.count = width;
1274 	}
1275       else
1276 	{
1277 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1278 	  s_info->positions_needed.large.count = 1;
1279 	}
1280     }
1281   else
1282     s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1283 }
1284 
1285 /* Return TRUE if any bytes from S_INFO store are needed.  */
1286 
1287 static inline bool
any_positions_needed_p(store_info * s_info)1288 any_positions_needed_p (store_info *s_info)
1289 {
1290   if (__builtin_expect (s_info->is_large, false))
1291     {
1292       HOST_WIDE_INT width;
1293       if (s_info->width.is_constant (&width))
1294 	{
1295 	  gcc_checking_assert (s_info->positions_needed.large.bmap);
1296 	  return s_info->positions_needed.large.count < width;
1297 	}
1298       else
1299 	{
1300 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1301 	  return s_info->positions_needed.large.count == 0;
1302 	}
1303     }
1304   else
1305     return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1306 }
1307 
1308 /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1309    store are known to be needed.  */
1310 
1311 static inline bool
all_positions_needed_p(store_info * s_info,poly_int64 start,poly_int64 width)1312 all_positions_needed_p (store_info *s_info, poly_int64 start,
1313 			poly_int64 width)
1314 {
1315   gcc_assert (s_info->rhs);
1316   if (!s_info->width.is_constant ())
1317     {
1318       gcc_assert (s_info->is_large
1319 		  && !s_info->positions_needed.large.bmap);
1320       return s_info->positions_needed.large.count == 0;
1321     }
1322 
1323   /* Otherwise, if START and WIDTH are non-constant, we're asking about
1324      a non-constant region of a constant-sized store.  We can't say for
1325      sure that all positions are needed.  */
1326   HOST_WIDE_INT const_start, const_width;
1327   if (!start.is_constant (&const_start)
1328       || !width.is_constant (&const_width))
1329     return false;
1330 
1331   if (__builtin_expect (s_info->is_large, false))
1332     {
1333       for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1334 	if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1335 	  return false;
1336       return true;
1337     }
1338   else
1339     {
1340       unsigned HOST_WIDE_INT mask
1341 	= lowpart_bitmask (const_width) << const_start;
1342       return (s_info->positions_needed.small_bitmask & mask) == mask;
1343     }
1344 }
1345 
1346 
1347 static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1348 			   poly_int64, basic_block, bool);
1349 
1350 
1351 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
1352    there is a candidate store, after adding it to the appropriate
1353    local store group if so.  */
1354 
1355 static int
record_store(rtx body,bb_info_t bb_info)1356 record_store (rtx body, bb_info_t bb_info)
1357 {
1358   rtx mem, rhs, const_rhs, mem_addr;
1359   poly_int64 offset = 0;
1360   poly_int64 width = 0;
1361   insn_info_t insn_info = bb_info->last_insn;
1362   store_info *store_info = NULL;
1363   int group_id;
1364   cselib_val *base = NULL;
1365   insn_info_t ptr, last, redundant_reason;
1366   bool store_is_unused;
1367 
1368   if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1369     return 0;
1370 
1371   mem = SET_DEST (body);
1372 
1373   /* If this is not used, then this cannot be used to keep the insn
1374      from being deleted.  On the other hand, it does provide something
1375      that can be used to prove that another store is dead.  */
1376   store_is_unused
1377     = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1378 
1379   /* Check whether that value is a suitable memory location.  */
1380   if (!MEM_P (mem))
1381     {
1382       /* If the set or clobber is unused, then it does not effect our
1383 	 ability to get rid of the entire insn.  */
1384       if (!store_is_unused)
1385 	insn_info->cannot_delete = true;
1386       return 0;
1387     }
1388 
1389   /* At this point we know mem is a mem. */
1390   if (GET_MODE (mem) == BLKmode)
1391     {
1392       HOST_WIDE_INT const_size;
1393       if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1394 	{
1395 	  if (dump_file && (dump_flags & TDF_DETAILS))
1396 	    fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1397 	  add_wild_read (bb_info);
1398 	  insn_info->cannot_delete = true;
1399 	  return 0;
1400 	}
1401       /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1402 	 as memset (addr, 0, 36);  */
1403       else if (!MEM_SIZE_KNOWN_P (mem)
1404 	       || maybe_le (MEM_SIZE (mem), 0)
1405 	       /* This is a limit on the bitmap size, which is only relevant
1406 		  for constant-sized MEMs.  */
1407 	       || (MEM_SIZE (mem).is_constant (&const_size)
1408 		   && const_size > MAX_OFFSET)
1409 	       || GET_CODE (body) != SET
1410 	       || !CONST_INT_P (SET_SRC (body)))
1411 	{
1412 	  if (!store_is_unused)
1413 	    {
1414 	      /* If the set or clobber is unused, then it does not effect our
1415 		 ability to get rid of the entire insn.  */
1416 	      insn_info->cannot_delete = true;
1417 	      clear_rhs_from_active_local_stores ();
1418 	    }
1419 	  return 0;
1420 	}
1421     }
1422 
1423   /* We can still process a volatile mem, we just cannot delete it.  */
1424   if (MEM_VOLATILE_P (mem))
1425     insn_info->cannot_delete = true;
1426 
1427   if (!canon_address (mem, &group_id, &offset, &base))
1428     {
1429       clear_rhs_from_active_local_stores ();
1430       return 0;
1431     }
1432 
1433   if (GET_MODE (mem) == BLKmode)
1434     width = MEM_SIZE (mem);
1435   else
1436     width = GET_MODE_SIZE (GET_MODE (mem));
1437 
1438   if (!endpoint_representable_p (offset, width))
1439     {
1440       clear_rhs_from_active_local_stores ();
1441       return 0;
1442     }
1443 
1444   if (known_eq (width, 0))
1445     return 0;
1446 
1447   if (group_id >= 0)
1448     {
1449       /* In the restrictive case where the base is a constant or the
1450 	 frame pointer we can do global analysis.  */
1451 
1452       group_info *group
1453 	= rtx_group_vec[group_id];
1454       tree expr = MEM_EXPR (mem);
1455 
1456       store_info = rtx_store_info_pool.allocate ();
1457       set_usage_bits (group, offset, width, expr);
1458 
1459       if (dump_file && (dump_flags & TDF_DETAILS))
1460 	{
1461 	  fprintf (dump_file, " processing const base store gid=%d",
1462 		   group_id);
1463 	  print_range (dump_file, offset, width);
1464 	  fprintf (dump_file, "\n");
1465 	}
1466     }
1467   else
1468     {
1469       if (may_be_sp_based_p (XEXP (mem, 0)))
1470 	insn_info->stack_pointer_based = true;
1471       insn_info->contains_cselib_groups = true;
1472 
1473       store_info = cse_store_info_pool.allocate ();
1474       group_id = -1;
1475 
1476       if (dump_file && (dump_flags & TDF_DETAILS))
1477 	{
1478 	  fprintf (dump_file, " processing cselib store ");
1479 	  print_range (dump_file, offset, width);
1480 	  fprintf (dump_file, "\n");
1481 	}
1482     }
1483 
1484   const_rhs = rhs = NULL_RTX;
1485   if (GET_CODE (body) == SET
1486       /* No place to keep the value after ra.  */
1487       && !reload_completed
1488       && (REG_P (SET_SRC (body))
1489 	  || GET_CODE (SET_SRC (body)) == SUBREG
1490 	  || CONSTANT_P (SET_SRC (body)))
1491       && !MEM_VOLATILE_P (mem)
1492       /* Sometimes the store and reload is used for truncation and
1493 	 rounding.  */
1494       && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1495     {
1496       rhs = SET_SRC (body);
1497       if (CONSTANT_P (rhs))
1498 	const_rhs = rhs;
1499       else if (body == PATTERN (insn_info->insn))
1500 	{
1501 	  rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1502 	  if (tem && CONSTANT_P (XEXP (tem, 0)))
1503 	    const_rhs = XEXP (tem, 0);
1504 	}
1505       if (const_rhs == NULL_RTX && REG_P (rhs))
1506 	{
1507 	  rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1508 
1509 	  if (tem && CONSTANT_P (tem))
1510 	    const_rhs = tem;
1511 	}
1512     }
1513 
1514   /* Check to see if this stores causes some other stores to be
1515      dead.  */
1516   ptr = active_local_stores;
1517   last = NULL;
1518   redundant_reason = NULL;
1519   mem = canon_rtx (mem);
1520 
1521   if (group_id < 0)
1522     mem_addr = base->val_rtx;
1523   else
1524     {
1525       group_info *group = rtx_group_vec[group_id];
1526       mem_addr = group->canon_base_addr;
1527     }
1528   if (maybe_ne (offset, 0))
1529     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1530 
1531   while (ptr)
1532     {
1533       insn_info_t next = ptr->next_local_store;
1534       class store_info *s_info = ptr->store_rec;
1535       bool del = true;
1536 
1537       /* Skip the clobbers. We delete the active insn if this insn
1538 	 shadows the set.  To have been put on the active list, it
1539 	 has exactly on set. */
1540       while (!s_info->is_set)
1541 	s_info = s_info->next;
1542 
1543       if (s_info->group_id == group_id && s_info->cse_base == base)
1544 	{
1545 	  HOST_WIDE_INT i;
1546 	  if (dump_file && (dump_flags & TDF_DETAILS))
1547 	    {
1548 	      fprintf (dump_file, "    trying store in insn=%d gid=%d",
1549 		       INSN_UID (ptr->insn), s_info->group_id);
1550 	      print_range (dump_file, s_info->offset, s_info->width);
1551 	      fprintf (dump_file, "\n");
1552 	    }
1553 
1554 	  /* Even if PTR won't be eliminated as unneeded, if both
1555 	     PTR and this insn store the same constant value, we might
1556 	     eliminate this insn instead.  */
1557 	  if (s_info->const_rhs
1558 	      && const_rhs
1559 	      && known_subrange_p (offset, width,
1560 				   s_info->offset, s_info->width)
1561 	      && all_positions_needed_p (s_info, offset - s_info->offset,
1562 					 width)
1563 	      /* We can only remove the later store if the earlier aliases
1564 		 at least all accesses the later one.  */
1565 	      && ((MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem)
1566 		   || alias_set_subset_of (MEM_ALIAS_SET (mem),
1567 					   MEM_ALIAS_SET (s_info->mem)))
1568 		  && (!MEM_EXPR (s_info->mem)
1569 		      || refs_same_for_tbaa_p (MEM_EXPR (s_info->mem),
1570 					       MEM_EXPR (mem)))))
1571 	    {
1572 	      if (GET_MODE (mem) == BLKmode)
1573 		{
1574 		  if (GET_MODE (s_info->mem) == BLKmode
1575 		      && s_info->const_rhs == const_rhs)
1576 		    redundant_reason = ptr;
1577 		}
1578 	      else if (s_info->const_rhs == const0_rtx
1579 		       && const_rhs == const0_rtx)
1580 		redundant_reason = ptr;
1581 	      else
1582 		{
1583 		  rtx val;
1584 		  start_sequence ();
1585 		  val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1586 					BLOCK_FOR_INSN (insn_info->insn),
1587 					true);
1588 		  if (get_insns () != NULL)
1589 		    val = NULL_RTX;
1590 		  end_sequence ();
1591 		  if (val && rtx_equal_p (val, const_rhs))
1592 		    redundant_reason = ptr;
1593 		}
1594 	    }
1595 
1596 	  HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1597 	  if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1598 	    /* The new store touches every byte that S_INFO does.  */
1599 	    set_all_positions_unneeded (s_info);
1600 	  else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1601 		   && s_info->width.is_constant (&const_s_width)
1602 		   && width.is_constant (&const_width))
1603 	    {
1604 	      HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1605 	      begin_unneeded = MAX (begin_unneeded, 0);
1606 	      end_unneeded = MIN (end_unneeded, const_s_width);
1607 	      for (i = begin_unneeded; i < end_unneeded; ++i)
1608 		set_position_unneeded (s_info, i);
1609 	    }
1610 	  else
1611 	    {
1612 	      /* We don't know which parts of S_INFO are needed and
1613 		 which aren't, so invalidate the RHS.  */
1614 	      s_info->rhs = NULL;
1615 	      s_info->const_rhs = NULL;
1616 	    }
1617 	}
1618       else if (s_info->rhs)
1619 	/* Need to see if it is possible for this store to overwrite
1620 	   the value of store_info.  If it is, set the rhs to NULL to
1621 	   keep it from being used to remove a load.  */
1622 	{
1623 	  if (canon_output_dependence (s_info->mem, true,
1624 				       mem, GET_MODE (mem),
1625 				       mem_addr))
1626 	    {
1627 	      s_info->rhs = NULL;
1628 	      s_info->const_rhs = NULL;
1629 	    }
1630 	}
1631 
1632       /* An insn can be deleted if every position of every one of
1633 	 its s_infos is zero.  */
1634       if (any_positions_needed_p (s_info))
1635 	del = false;
1636 
1637       if (del)
1638 	{
1639 	  insn_info_t insn_to_delete = ptr;
1640 
1641 	  active_local_stores_len--;
1642 	  if (last)
1643 	    last->next_local_store = ptr->next_local_store;
1644 	  else
1645 	    active_local_stores = ptr->next_local_store;
1646 
1647 	  if (!insn_to_delete->cannot_delete)
1648 	    delete_dead_store_insn (insn_to_delete);
1649 	}
1650       else
1651 	last = ptr;
1652 
1653       ptr = next;
1654     }
1655 
1656   /* Finish filling in the store_info.  */
1657   store_info->next = insn_info->store_rec;
1658   insn_info->store_rec = store_info;
1659   store_info->mem = mem;
1660   store_info->mem_addr = mem_addr;
1661   store_info->cse_base = base;
1662   HOST_WIDE_INT const_width;
1663   if (!width.is_constant (&const_width))
1664     {
1665       store_info->is_large = true;
1666       store_info->positions_needed.large.count = 0;
1667       store_info->positions_needed.large.bmap = NULL;
1668     }
1669   else if (const_width > HOST_BITS_PER_WIDE_INT)
1670     {
1671       store_info->is_large = true;
1672       store_info->positions_needed.large.count = 0;
1673       store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1674     }
1675   else
1676     {
1677       store_info->is_large = false;
1678       store_info->positions_needed.small_bitmask
1679 	= lowpart_bitmask (const_width);
1680     }
1681   store_info->group_id = group_id;
1682   store_info->offset = offset;
1683   store_info->width = width;
1684   store_info->is_set = GET_CODE (body) == SET;
1685   store_info->rhs = rhs;
1686   store_info->const_rhs = const_rhs;
1687   store_info->redundant_reason = redundant_reason;
1688 
1689   /* If this is a clobber, we return 0.  We will only be able to
1690      delete this insn if there is only one store USED store, but we
1691      can use the clobber to delete other stores earlier.  */
1692   return store_info->is_set ? 1 : 0;
1693 }
1694 
1695 
1696 static void
dump_insn_info(const char * start,insn_info_t insn_info)1697 dump_insn_info (const char * start, insn_info_t insn_info)
1698 {
1699   fprintf (dump_file, "%s insn=%d %s\n", start,
1700 	   INSN_UID (insn_info->insn),
1701 	   insn_info->store_rec ? "has store" : "naked");
1702 }
1703 
1704 
1705 /* If the modes are different and the value's source and target do not
1706    line up, we need to extract the value from lower part of the rhs of
1707    the store, shift it, and then put it into a form that can be shoved
1708    into the read_insn.  This function generates a right SHIFT of a
1709    value that is at least ACCESS_SIZE bytes wide of READ_MODE.  The
1710    shift sequence is returned or NULL if we failed to find a
1711    shift.  */
1712 
1713 static rtx
find_shift_sequence(poly_int64 access_size,store_info * store_info,machine_mode read_mode,poly_int64 shift,bool speed,bool require_cst)1714 find_shift_sequence (poly_int64 access_size,
1715 		     store_info *store_info,
1716 		     machine_mode read_mode,
1717 		     poly_int64 shift, bool speed, bool require_cst)
1718 {
1719   machine_mode store_mode = GET_MODE (store_info->mem);
1720   scalar_int_mode new_mode;
1721   rtx read_reg = NULL;
1722 
1723   /* Some machines like the x86 have shift insns for each size of
1724      operand.  Other machines like the ppc or the ia-64 may only have
1725      shift insns that shift values within 32 or 64 bit registers.
1726      This loop tries to find the smallest shift insn that will right
1727      justify the value we want to read but is available in one insn on
1728      the machine.  */
1729 
1730   opt_scalar_int_mode new_mode_iter;
1731   FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)
1732     {
1733       rtx target, new_reg, new_lhs;
1734       rtx_insn *shift_seq, *insn;
1735       int cost;
1736 
1737       new_mode = new_mode_iter.require ();
1738       if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1739 	break;
1740       if (maybe_lt (GET_MODE_SIZE (new_mode), access_size))
1741 	continue;
1742 
1743       /* If a constant was stored into memory, try to simplify it here,
1744 	 otherwise the cost of the shift might preclude this optimization
1745 	 e.g. at -Os, even when no actual shift will be needed.  */
1746       if (store_info->const_rhs)
1747 	{
1748 	  poly_uint64 byte = subreg_lowpart_offset (new_mode, store_mode);
1749 	  rtx ret = simplify_subreg (new_mode, store_info->const_rhs,
1750 				     store_mode, byte);
1751 	  if (ret && CONSTANT_P (ret))
1752 	    {
1753 	      rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1754 	      ret = simplify_const_binary_operation (LSHIFTRT, new_mode,
1755 						     ret, shift_rtx);
1756 	      if (ret && CONSTANT_P (ret))
1757 		{
1758 		  byte = subreg_lowpart_offset (read_mode, new_mode);
1759 		  ret = simplify_subreg (read_mode, ret, new_mode, byte);
1760 		  if (ret && CONSTANT_P (ret)
1761 		      && (set_src_cost (ret, read_mode, speed)
1762 			  <= COSTS_N_INSNS (1)))
1763 		    return ret;
1764 		}
1765 	    }
1766 	}
1767 
1768       if (require_cst)
1769 	return NULL_RTX;
1770 
1771       /* Try a wider mode if truncating the store mode to NEW_MODE
1772 	 requires a real instruction.  */
1773       if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1774 	  && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1775 	continue;
1776 
1777       /* Also try a wider mode if the necessary punning is either not
1778 	 desirable or not possible.  */
1779       if (!CONSTANT_P (store_info->rhs)
1780 	  && !targetm.modes_tieable_p (new_mode, store_mode))
1781 	continue;
1782 
1783       new_reg = gen_reg_rtx (new_mode);
1784 
1785       start_sequence ();
1786 
1787       /* In theory we could also check for an ashr.  Ian Taylor knows
1788 	 of one dsp where the cost of these two was not the same.  But
1789 	 this really is a rare case anyway.  */
1790       target = expand_binop (new_mode, lshr_optab, new_reg,
1791 			     gen_int_shift_amount (new_mode, shift),
1792 			     new_reg, 1, OPTAB_DIRECT);
1793 
1794       shift_seq = get_insns ();
1795       end_sequence ();
1796 
1797       if (target != new_reg || shift_seq == NULL)
1798 	continue;
1799 
1800       cost = 0;
1801       for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1802 	if (INSN_P (insn))
1803 	  cost += insn_cost (insn, speed);
1804 
1805       /* The computation up to here is essentially independent
1806 	 of the arguments and could be precomputed.  It may
1807 	 not be worth doing so.  We could precompute if
1808 	 worthwhile or at least cache the results.  The result
1809 	 technically depends on both SHIFT and ACCESS_SIZE,
1810 	 but in practice the answer will depend only on ACCESS_SIZE.  */
1811 
1812       if (cost > COSTS_N_INSNS (1))
1813 	continue;
1814 
1815       new_lhs = extract_low_bits (new_mode, store_mode,
1816 				  copy_rtx (store_info->rhs));
1817       if (new_lhs == NULL_RTX)
1818 	continue;
1819 
1820       /* We found an acceptable shift.  Generate a move to
1821 	 take the value from the store and put it into the
1822 	 shift pseudo, then shift it, then generate another
1823 	 move to put in into the target of the read.  */
1824       emit_move_insn (new_reg, new_lhs);
1825       emit_insn (shift_seq);
1826       read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1827       break;
1828     }
1829 
1830   return read_reg;
1831 }
1832 
1833 
1834 /* Call back for note_stores to find the hard regs set or clobbered by
1835    insn.  Data is a bitmap of the hardregs set so far.  */
1836 
1837 static void
look_for_hardregs(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)1838 look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1839 {
1840   bitmap regs_set = (bitmap) data;
1841 
1842   if (REG_P (x)
1843       && HARD_REGISTER_P (x))
1844     bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1845 }
1846 
1847 /* Helper function for replace_read and record_store.
1848    Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1849    consisting of READ_WIDTH bytes starting from READ_OFFSET.  Return NULL
1850    if not successful.  If REQUIRE_CST is true, return always constant.  */
1851 
1852 static rtx
get_stored_val(store_info * store_info,machine_mode read_mode,poly_int64 read_offset,poly_int64 read_width,basic_block bb,bool require_cst)1853 get_stored_val (store_info *store_info, machine_mode read_mode,
1854 		poly_int64 read_offset, poly_int64 read_width,
1855 		basic_block bb, bool require_cst)
1856 {
1857   machine_mode store_mode = GET_MODE (store_info->mem);
1858   poly_int64 gap;
1859   rtx read_reg;
1860 
1861   /* To get here the read is within the boundaries of the write so
1862      shift will never be negative.  Start out with the shift being in
1863      bytes.  */
1864   if (store_mode == BLKmode)
1865     gap = 0;
1866   else if (BYTES_BIG_ENDIAN)
1867     gap = ((store_info->offset + store_info->width)
1868 	   - (read_offset + read_width));
1869   else
1870     gap = read_offset - store_info->offset;
1871 
1872   if (gap.is_constant () && maybe_ne (gap, 0))
1873     {
1874       poly_int64 shift = gap * BITS_PER_UNIT;
1875       poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1876       read_reg = find_shift_sequence (access_size, store_info, read_mode,
1877 				      shift, optimize_bb_for_speed_p (bb),
1878 				      require_cst);
1879     }
1880   else if (store_mode == BLKmode)
1881     {
1882       /* The store is a memset (addr, const_val, const_size).  */
1883       gcc_assert (CONST_INT_P (store_info->rhs));
1884       scalar_int_mode int_store_mode;
1885       if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1886 	read_reg = NULL_RTX;
1887       else if (store_info->rhs == const0_rtx)
1888 	read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1889       else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1890 	       || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1891 	read_reg = NULL_RTX;
1892       else
1893 	{
1894 	  unsigned HOST_WIDE_INT c
1895 	    = INTVAL (store_info->rhs)
1896 	      & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1897 	  int shift = BITS_PER_UNIT;
1898 	  while (shift < HOST_BITS_PER_WIDE_INT)
1899 	    {
1900 	      c |= (c << shift);
1901 	      shift <<= 1;
1902 	    }
1903 	  read_reg = gen_int_mode (c, int_store_mode);
1904 	  read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1905 	}
1906     }
1907   else if (store_info->const_rhs
1908 	   && (require_cst
1909 	       || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1910     read_reg = extract_low_bits (read_mode, store_mode,
1911 				 copy_rtx (store_info->const_rhs));
1912   else
1913     read_reg = extract_low_bits (read_mode, store_mode,
1914 				 copy_rtx (store_info->rhs));
1915   if (require_cst && read_reg && !CONSTANT_P (read_reg))
1916     read_reg = NULL_RTX;
1917   return read_reg;
1918 }
1919 
1920 /* Take a sequence of:
1921      A <- r1
1922      ...
1923      ... <- A
1924 
1925    and change it into
1926    r2 <- r1
1927    A <- r1
1928    ...
1929    ... <- r2
1930 
1931    or
1932 
1933    r3 <- extract (r1)
1934    r3 <- r3 >> shift
1935    r2 <- extract (r3)
1936    ... <- r2
1937 
1938    or
1939 
1940    r2 <- extract (r1)
1941    ... <- r2
1942 
1943    Depending on the alignment and the mode of the store and
1944    subsequent load.
1945 
1946 
1947    The STORE_INFO and STORE_INSN are for the store and READ_INFO
1948    and READ_INSN are for the read.  Return true if the replacement
1949    went ok.  */
1950 
1951 static bool
replace_read(store_info * store_info,insn_info_t store_insn,read_info_t read_info,insn_info_t read_insn,rtx * loc,bitmap regs_live)1952 replace_read (store_info *store_info, insn_info_t store_insn,
1953 	      read_info_t read_info, insn_info_t read_insn, rtx *loc,
1954 	      bitmap regs_live)
1955 {
1956   machine_mode store_mode = GET_MODE (store_info->mem);
1957   machine_mode read_mode = GET_MODE (read_info->mem);
1958   rtx_insn *insns, *this_insn;
1959   rtx read_reg;
1960   basic_block bb;
1961 
1962   if (!dbg_cnt (dse))
1963     return false;
1964 
1965   /* Create a sequence of instructions to set up the read register.
1966      This sequence goes immediately before the store and its result
1967      is read by the load.
1968 
1969      We need to keep this in perspective.  We are replacing a read
1970      with a sequence of insns, but the read will almost certainly be
1971      in cache, so it is not going to be an expensive one.  Thus, we
1972      are not willing to do a multi insn shift or worse a subroutine
1973      call to get rid of the read.  */
1974   if (dump_file && (dump_flags & TDF_DETAILS))
1975     fprintf (dump_file, "trying to replace %smode load in insn %d"
1976 	     " from %smode store in insn %d\n",
1977 	     GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
1978 	     GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
1979   start_sequence ();
1980   bb = BLOCK_FOR_INSN (read_insn->insn);
1981   read_reg = get_stored_val (store_info,
1982 			     read_mode, read_info->offset, read_info->width,
1983 			     bb, false);
1984   if (read_reg == NULL_RTX)
1985     {
1986       end_sequence ();
1987       if (dump_file && (dump_flags & TDF_DETAILS))
1988 	fprintf (dump_file, " -- could not extract bits of stored value\n");
1989       return false;
1990     }
1991   /* Force the value into a new register so that it won't be clobbered
1992      between the store and the load.  */
1993   read_reg = copy_to_mode_reg (read_mode, read_reg);
1994   insns = get_insns ();
1995   end_sequence ();
1996 
1997   if (insns != NULL_RTX)
1998     {
1999       /* Now we have to scan the set of new instructions to see if the
2000 	 sequence contains and sets of hardregs that happened to be
2001 	 live at this point.  For instance, this can happen if one of
2002 	 the insns sets the CC and the CC happened to be live at that
2003 	 point.  This does occasionally happen, see PR 37922.  */
2004       bitmap regs_set = BITMAP_ALLOC (&reg_obstack);
2005 
2006       for (this_insn = insns;
2007 	   this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
2008 	{
2009 	  if (insn_invalid_p (this_insn, false))
2010 	    {
2011 	      if (dump_file && (dump_flags & TDF_DETAILS))
2012 		{
2013 		  fprintf (dump_file, " -- replacing the loaded MEM with ");
2014 		  print_simple_rtl (dump_file, read_reg);
2015 		  fprintf (dump_file, " led to an invalid instruction\n");
2016 		}
2017 	      BITMAP_FREE (regs_set);
2018 	      return false;
2019 	    }
2020 	  note_stores (this_insn, look_for_hardregs, regs_set);
2021 	}
2022 
2023       bitmap_and_into (regs_set, regs_live);
2024       if (!bitmap_empty_p (regs_set))
2025 	{
2026 	  if (dump_file && (dump_flags & TDF_DETAILS))
2027 	    {
2028 	      fprintf (dump_file, "abandoning replacement because sequence "
2029 				  "clobbers live hardregs:");
2030 	      df_print_regset (dump_file, regs_set);
2031 	    }
2032 
2033 	  BITMAP_FREE (regs_set);
2034 	  return false;
2035 	}
2036       BITMAP_FREE (regs_set);
2037     }
2038 
2039   subrtx_iterator::array_type array;
2040   FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
2041     {
2042       const_rtx x = *iter;
2043       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2044 	{
2045 	  if (dump_file && (dump_flags & TDF_DETAILS))
2046 	    fprintf (dump_file, " -- replacing the MEM failed due to address "
2047 				"side-effects\n");
2048 	  return false;
2049 	}
2050     }
2051 
2052   if (validate_change (read_insn->insn, loc, read_reg, 0))
2053     {
2054       deferred_change *change = deferred_change_pool.allocate ();
2055 
2056       /* Insert this right before the store insn where it will be safe
2057 	 from later insns that might change it before the read.  */
2058       emit_insn_before (insns, store_insn->insn);
2059 
2060       /* And now for the kludge part: cselib croaks if you just
2061 	 return at this point.  There are two reasons for this:
2062 
2063 	 1) Cselib has an idea of how many pseudos there are and
2064 	 that does not include the new ones we just added.
2065 
2066 	 2) Cselib does not know about the move insn we added
2067 	 above the store_info, and there is no way to tell it
2068 	 about it, because it has "moved on".
2069 
2070 	 Problem (1) is fixable with a certain amount of engineering.
2071 	 Problem (2) is requires starting the bb from scratch.  This
2072 	 could be expensive.
2073 
2074 	 So we are just going to have to lie.  The move/extraction
2075 	 insns are not really an issue, cselib did not see them.  But
2076 	 the use of the new pseudo read_insn is a real problem because
2077 	 cselib has not scanned this insn.  The way that we solve this
2078 	 problem is that we are just going to put the mem back for now
2079 	 and when we are finished with the block, we undo this.  We
2080 	 keep a table of mems to get rid of.  At the end of the basic
2081 	 block we can put them back.  */
2082 
2083       *loc = read_info->mem;
2084       change->next = deferred_change_list;
2085       deferred_change_list = change;
2086       change->loc = loc;
2087       change->reg = read_reg;
2088 
2089       /* Get rid of the read_info, from the point of view of the
2090 	 rest of dse, play like this read never happened.  */
2091       read_insn->read_rec = read_info->next;
2092       read_info_type_pool.remove (read_info);
2093       if (dump_file && (dump_flags & TDF_DETAILS))
2094 	{
2095 	  fprintf (dump_file, " -- replaced the loaded MEM with ");
2096 	  print_simple_rtl (dump_file, read_reg);
2097 	  fprintf (dump_file, "\n");
2098 	}
2099       return true;
2100     }
2101   else
2102     {
2103       if (dump_file && (dump_flags & TDF_DETAILS))
2104 	{
2105 	  fprintf (dump_file, " -- replacing the loaded MEM with ");
2106 	  print_simple_rtl (dump_file, read_reg);
2107 	  fprintf (dump_file, " led to an invalid instruction\n");
2108 	}
2109       return false;
2110     }
2111 }
2112 
2113 /* Check the address of MEM *LOC and kill any appropriate stores that may
2114    be active.  */
2115 
2116 static void
check_mem_read_rtx(rtx * loc,bb_info_t bb_info)2117 check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
2118 {
2119   rtx mem = *loc, mem_addr;
2120   insn_info_t insn_info;
2121   poly_int64 offset = 0;
2122   poly_int64 width = 0;
2123   cselib_val *base = NULL;
2124   int group_id;
2125   read_info_t read_info;
2126 
2127   insn_info = bb_info->last_insn;
2128 
2129   if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2130       || MEM_VOLATILE_P (mem))
2131     {
2132       if (crtl->stack_protect_guard
2133 	  && (MEM_EXPR (mem) == crtl->stack_protect_guard
2134 	      || (crtl->stack_protect_guard_decl
2135 		  && MEM_EXPR (mem) == crtl->stack_protect_guard_decl))
2136 	  && MEM_VOLATILE_P (mem))
2137 	{
2138 	  /* This is either the stack protector canary on the stack,
2139 	     which ought to be written by a MEM_VOLATILE_P store and
2140 	     thus shouldn't be deleted and is read at the very end of
2141 	     function, but shouldn't conflict with any other store.
2142 	     Or it is __stack_chk_guard variable or TLS or whatever else
2143 	     MEM holding the canary value, which really shouldn't be
2144 	     ever modified in -fstack-protector* protected functions,
2145 	     otherwise the prologue store wouldn't match the epilogue
2146 	     check.  */
2147 	  if (dump_file && (dump_flags & TDF_DETAILS))
2148 	    fprintf (dump_file, " stack protector canary read ignored.\n");
2149 	  insn_info->cannot_delete = true;
2150 	  return;
2151 	}
2152 
2153       if (dump_file && (dump_flags & TDF_DETAILS))
2154 	fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2155       add_wild_read (bb_info);
2156       insn_info->cannot_delete = true;
2157       return;
2158     }
2159 
2160   /* If it is reading readonly mem, then there can be no conflict with
2161      another write. */
2162   if (MEM_READONLY_P (mem))
2163     return;
2164 
2165   if (!canon_address (mem, &group_id, &offset, &base))
2166     {
2167       if (dump_file && (dump_flags & TDF_DETAILS))
2168 	fprintf (dump_file, " adding wild read, canon_address failure.\n");
2169       add_wild_read (bb_info);
2170       return;
2171     }
2172 
2173   if (GET_MODE (mem) == BLKmode)
2174     width = -1;
2175   else
2176     width = GET_MODE_SIZE (GET_MODE (mem));
2177 
2178   if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2179     {
2180       if (dump_file && (dump_flags & TDF_DETAILS))
2181 	fprintf (dump_file, " adding wild read, due to overflow.\n");
2182       add_wild_read (bb_info);
2183       return;
2184     }
2185 
2186   read_info = read_info_type_pool.allocate ();
2187   read_info->group_id = group_id;
2188   read_info->mem = mem;
2189   read_info->offset = offset;
2190   read_info->width = width;
2191   read_info->next = insn_info->read_rec;
2192   insn_info->read_rec = read_info;
2193   if (group_id < 0)
2194     mem_addr = base->val_rtx;
2195   else
2196     {
2197       group_info *group = rtx_group_vec[group_id];
2198       mem_addr = group->canon_base_addr;
2199     }
2200   if (maybe_ne (offset, 0))
2201     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2202 
2203   if (group_id >= 0)
2204     {
2205       /* This is the restricted case where the base is a constant or
2206 	 the frame pointer and offset is a constant.  */
2207       insn_info_t i_ptr = active_local_stores;
2208       insn_info_t last = NULL;
2209 
2210       if (dump_file && (dump_flags & TDF_DETAILS))
2211 	{
2212 	  if (!known_size_p (width))
2213 	    fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2214 		     group_id);
2215 	  else
2216 	    {
2217 	      fprintf (dump_file, " processing const load gid=%d", group_id);
2218 	      print_range (dump_file, offset, width);
2219 	      fprintf (dump_file, "\n");
2220 	    }
2221 	}
2222 
2223       while (i_ptr)
2224 	{
2225 	  bool remove = false;
2226 	  store_info *store_info = i_ptr->store_rec;
2227 
2228 	  /* Skip the clobbers.  */
2229 	  while (!store_info->is_set)
2230 	    store_info = store_info->next;
2231 
2232 	  /* There are three cases here.  */
2233 	  if (store_info->group_id < 0)
2234 	    /* We have a cselib store followed by a read from a
2235 	       const base. */
2236 	    remove
2237 	      = canon_true_dependence (store_info->mem,
2238 				       GET_MODE (store_info->mem),
2239 				       store_info->mem_addr,
2240 				       mem, mem_addr);
2241 
2242 	  else if (group_id == store_info->group_id)
2243 	    {
2244 	      /* This is a block mode load.  We may get lucky and
2245 		 canon_true_dependence may save the day.  */
2246 	      if (!known_size_p (width))
2247 		remove
2248 		  = canon_true_dependence (store_info->mem,
2249 					   GET_MODE (store_info->mem),
2250 					   store_info->mem_addr,
2251 					   mem, mem_addr);
2252 
2253 	      /* If this read is just reading back something that we just
2254 		 stored, rewrite the read.  */
2255 	      else
2256 		{
2257 		  if (store_info->rhs
2258 		      && known_subrange_p (offset, width, store_info->offset,
2259 					   store_info->width)
2260 		      && all_positions_needed_p (store_info,
2261 						 offset - store_info->offset,
2262 						 width)
2263 		      && replace_read (store_info, i_ptr, read_info,
2264 				       insn_info, loc, bb_info->regs_live))
2265 		    return;
2266 
2267 		  /* The bases are the same, just see if the offsets
2268 		     could overlap.  */
2269 		  if (ranges_maybe_overlap_p (offset, width,
2270 					      store_info->offset,
2271 					      store_info->width))
2272 		    remove = true;
2273 		}
2274 	    }
2275 
2276 	  /* else
2277 	     The else case that is missing here is that the
2278 	     bases are constant but different.  There is nothing
2279 	     to do here because there is no overlap.  */
2280 
2281 	  if (remove)
2282 	    {
2283 	      if (dump_file && (dump_flags & TDF_DETAILS))
2284 		dump_insn_info ("removing from active", i_ptr);
2285 
2286 	      active_local_stores_len--;
2287 	      if (last)
2288 		last->next_local_store = i_ptr->next_local_store;
2289 	      else
2290 		active_local_stores = i_ptr->next_local_store;
2291 	    }
2292 	  else
2293 	    last = i_ptr;
2294 	  i_ptr = i_ptr->next_local_store;
2295 	}
2296     }
2297   else
2298     {
2299       insn_info_t i_ptr = active_local_stores;
2300       insn_info_t last = NULL;
2301       if (dump_file && (dump_flags & TDF_DETAILS))
2302 	{
2303 	  fprintf (dump_file, " processing cselib load mem:");
2304 	  print_inline_rtx (dump_file, mem, 0);
2305 	  fprintf (dump_file, "\n");
2306 	}
2307 
2308       while (i_ptr)
2309 	{
2310 	  bool remove = false;
2311 	  store_info *store_info = i_ptr->store_rec;
2312 
2313 	  if (dump_file && (dump_flags & TDF_DETAILS))
2314 	    fprintf (dump_file, " processing cselib load against insn %d\n",
2315 		     INSN_UID (i_ptr->insn));
2316 
2317 	  /* Skip the clobbers.  */
2318 	  while (!store_info->is_set)
2319 	    store_info = store_info->next;
2320 
2321 	  /* If this read is just reading back something that we just
2322 	     stored, rewrite the read.  */
2323 	  if (store_info->rhs
2324 	      && store_info->group_id == -1
2325 	      && store_info->cse_base == base
2326 	      && known_subrange_p (offset, width, store_info->offset,
2327 				   store_info->width)
2328 	      && all_positions_needed_p (store_info,
2329 					 offset - store_info->offset, width)
2330 	      && replace_read (store_info, i_ptr,  read_info, insn_info, loc,
2331 			       bb_info->regs_live))
2332 	    return;
2333 
2334 	  remove = canon_true_dependence (store_info->mem,
2335 					  GET_MODE (store_info->mem),
2336 					  store_info->mem_addr,
2337 					  mem, mem_addr);
2338 
2339 	  if (remove)
2340 	    {
2341 	      if (dump_file && (dump_flags & TDF_DETAILS))
2342 		dump_insn_info ("removing from active", i_ptr);
2343 
2344 	      active_local_stores_len--;
2345 	      if (last)
2346 		last->next_local_store = i_ptr->next_local_store;
2347 	      else
2348 		active_local_stores = i_ptr->next_local_store;
2349 	    }
2350 	  else
2351 	    last = i_ptr;
2352 	  i_ptr = i_ptr->next_local_store;
2353 	}
2354     }
2355 }
2356 
2357 /* A note_uses callback in which DATA points the INSN_INFO for
2358    as check_mem_read_rtx.  Nullify the pointer if i_m_r_m_r returns
2359    true for any part of *LOC.  */
2360 
2361 static void
check_mem_read_use(rtx * loc,void * data)2362 check_mem_read_use (rtx *loc, void *data)
2363 {
2364   subrtx_ptr_iterator::array_type array;
2365   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2366     {
2367       rtx *loc = *iter;
2368       if (MEM_P (*loc))
2369 	check_mem_read_rtx (loc, (bb_info_t) data);
2370     }
2371 }
2372 
2373 
2374 /* Get arguments passed to CALL_INSN.  Return TRUE if successful.
2375    So far it only handles arguments passed in registers.  */
2376 
2377 static bool
get_call_args(rtx call_insn,tree fn,rtx * args,int nargs)2378 get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2379 {
2380   CUMULATIVE_ARGS args_so_far_v;
2381   cumulative_args_t args_so_far;
2382   tree arg;
2383   int idx;
2384 
2385   INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2386   args_so_far = pack_cumulative_args (&args_so_far_v);
2387 
2388   arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2389   for (idx = 0;
2390        arg != void_list_node && idx < nargs;
2391        arg = TREE_CHAIN (arg), idx++)
2392     {
2393       scalar_int_mode mode;
2394       rtx reg, link, tmp;
2395 
2396       if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2397 	return false;
2398 
2399       function_arg_info arg (mode, /*named=*/true);
2400       reg = targetm.calls.function_arg (args_so_far, arg);
2401       if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2402 	return false;
2403 
2404       for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2405 	   link;
2406 	   link = XEXP (link, 1))
2407 	if (GET_CODE (XEXP (link, 0)) == USE)
2408 	  {
2409 	    scalar_int_mode arg_mode;
2410 	    args[idx] = XEXP (XEXP (link, 0), 0);
2411 	    if (REG_P (args[idx])
2412 		&& REGNO (args[idx]) == REGNO (reg)
2413 		&& (GET_MODE (args[idx]) == mode
2414 		    || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2415 			&& (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2416 			&& (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2417 	      break;
2418 	  }
2419       if (!link)
2420 	return false;
2421 
2422       tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2423       if (GET_MODE (args[idx]) != mode)
2424 	{
2425 	  if (!tmp || !CONST_INT_P (tmp))
2426 	    return false;
2427 	  tmp = gen_int_mode (INTVAL (tmp), mode);
2428 	}
2429       if (tmp)
2430 	args[idx] = tmp;
2431 
2432       targetm.calls.function_arg_advance (args_so_far, arg);
2433     }
2434   if (arg != void_list_node || idx != nargs)
2435     return false;
2436   return true;
2437 }
2438 
2439 /* Return a bitmap of the fixed registers contained in IN.  */
2440 
2441 static bitmap
copy_fixed_regs(const_bitmap in)2442 copy_fixed_regs (const_bitmap in)
2443 {
2444   bitmap ret;
2445 
2446   ret = ALLOC_REG_SET (NULL);
2447   bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set));
2448   return ret;
2449 }
2450 
2451 /* Apply record_store to all candidate stores in INSN.  Mark INSN
2452    if some part of it is not a candidate store and assigns to a
2453    non-register target.  */
2454 
2455 static void
scan_insn(bb_info_t bb_info,rtx_insn * insn,int max_active_local_stores)2456 scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores)
2457 {
2458   rtx body;
2459   insn_info_type *insn_info = insn_info_type_pool.allocate ();
2460   int mems_found = 0;
2461   memset (insn_info, 0, sizeof (struct insn_info_type));
2462 
2463   if (dump_file && (dump_flags & TDF_DETAILS))
2464     fprintf (dump_file, "\n**scanning insn=%d\n",
2465 	     INSN_UID (insn));
2466 
2467   insn_info->prev_insn = bb_info->last_insn;
2468   insn_info->insn = insn;
2469   bb_info->last_insn = insn_info;
2470 
2471   if (DEBUG_INSN_P (insn))
2472     {
2473       insn_info->cannot_delete = true;
2474       return;
2475     }
2476 
2477   /* Look at all of the uses in the insn.  */
2478   note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2479 
2480   if (CALL_P (insn))
2481     {
2482       bool const_call;
2483       rtx call, sym;
2484       tree memset_call = NULL_TREE;
2485 
2486       insn_info->cannot_delete = true;
2487 
2488       /* Const functions cannot do anything bad i.e. read memory,
2489 	 however, they can read their parameters which may have
2490 	 been pushed onto the stack.
2491 	 memset and bzero don't read memory either.  */
2492       const_call = RTL_CONST_CALL_P (insn);
2493       if (!const_call
2494 	  && (call = get_call_rtx_from (insn))
2495 	  && (sym = XEXP (XEXP (call, 0), 0))
2496 	  && GET_CODE (sym) == SYMBOL_REF
2497 	  && SYMBOL_REF_DECL (sym)
2498 	  && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2499 	  && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET))
2500 	memset_call = SYMBOL_REF_DECL (sym);
2501 
2502       if (const_call || memset_call)
2503 	{
2504 	  insn_info_t i_ptr = active_local_stores;
2505 	  insn_info_t last = NULL;
2506 
2507 	  if (dump_file && (dump_flags & TDF_DETAILS))
2508 	    fprintf (dump_file, "%s call %d\n",
2509 		     const_call ? "const" : "memset", INSN_UID (insn));
2510 
2511 	  /* See the head comment of the frame_read field.  */
2512 	  if (reload_completed
2513 	      /* Tail calls are storing their arguments using
2514 		 arg pointer.  If it is a frame pointer on the target,
2515 		 even before reload we need to kill frame pointer based
2516 		 stores.  */
2517 	      || (SIBLING_CALL_P (insn)
2518 		  && HARD_FRAME_POINTER_IS_ARG_POINTER))
2519 	    insn_info->frame_read = true;
2520 
2521 	  /* Loop over the active stores and remove those which are
2522 	     killed by the const function call.  */
2523 	  while (i_ptr)
2524 	    {
2525 	      bool remove_store = false;
2526 
2527 	      /* The stack pointer based stores are always killed.  */
2528 	      if (i_ptr->stack_pointer_based)
2529 	        remove_store = true;
2530 
2531 	      /* If the frame is read, the frame related stores are killed.  */
2532 	      else if (insn_info->frame_read)
2533 		{
2534 		  store_info *store_info = i_ptr->store_rec;
2535 
2536 		  /* Skip the clobbers.  */
2537 		  while (!store_info->is_set)
2538 		    store_info = store_info->next;
2539 
2540 		  if (store_info->group_id >= 0
2541 		      && rtx_group_vec[store_info->group_id]->frame_related)
2542 		    remove_store = true;
2543 		}
2544 
2545 	      if (remove_store)
2546 		{
2547 		  if (dump_file && (dump_flags & TDF_DETAILS))
2548 		    dump_insn_info ("removing from active", i_ptr);
2549 
2550 		  active_local_stores_len--;
2551 		  if (last)
2552 		    last->next_local_store = i_ptr->next_local_store;
2553 		  else
2554 		    active_local_stores = i_ptr->next_local_store;
2555 		}
2556 	      else
2557 		last = i_ptr;
2558 
2559 	      i_ptr = i_ptr->next_local_store;
2560 	    }
2561 
2562 	  if (memset_call)
2563 	    {
2564 	      rtx args[3];
2565 	      if (get_call_args (insn, memset_call, args, 3)
2566 		  && CONST_INT_P (args[1])
2567 		  && CONST_INT_P (args[2])
2568 		  && INTVAL (args[2]) > 0)
2569 		{
2570 		  rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2571 		  set_mem_size (mem, INTVAL (args[2]));
2572 		  body = gen_rtx_SET (mem, args[1]);
2573 		  mems_found += record_store (body, bb_info);
2574 		  if (dump_file && (dump_flags & TDF_DETAILS))
2575 		    fprintf (dump_file, "handling memset as BLKmode store\n");
2576 		  if (mems_found == 1)
2577 		    {
2578 		      if (active_local_stores_len++ >= max_active_local_stores)
2579 			{
2580 			  active_local_stores_len = 1;
2581 			  active_local_stores = NULL;
2582 			}
2583 		      insn_info->fixed_regs_live
2584 			= copy_fixed_regs (bb_info->regs_live);
2585 		      insn_info->next_local_store = active_local_stores;
2586 		      active_local_stores = insn_info;
2587 		    }
2588 		}
2589 	      else
2590 		clear_rhs_from_active_local_stores ();
2591 	    }
2592 	}
2593       else if (SIBLING_CALL_P (insn)
2594 	       && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER))
2595 	/* Arguments for a sibling call that are pushed to memory are passed
2596 	   using the incoming argument pointer of the current function.  After
2597 	   reload that might be (and likely is) frame pointer based.  And, if
2598 	   it is a frame pointer on the target, even before reload we need to
2599 	   kill frame pointer based stores.  */
2600 	add_wild_read (bb_info);
2601       else
2602 	/* Every other call, including pure functions, may read any memory
2603            that is not relative to the frame.  */
2604         add_non_frame_wild_read (bb_info);
2605 
2606       return;
2607     }
2608 
2609   /* Assuming that there are sets in these insns, we cannot delete
2610      them.  */
2611   if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2612       || volatile_refs_p (PATTERN (insn))
2613       || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2614       || (RTX_FRAME_RELATED_P (insn))
2615       || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2616     insn_info->cannot_delete = true;
2617 
2618   body = PATTERN (insn);
2619   if (GET_CODE (body) == PARALLEL)
2620     {
2621       int i;
2622       for (i = 0; i < XVECLEN (body, 0); i++)
2623 	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2624     }
2625   else
2626     mems_found += record_store (body, bb_info);
2627 
2628   if (dump_file && (dump_flags & TDF_DETAILS))
2629     fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2630 	     mems_found, insn_info->cannot_delete ? "true" : "false");
2631 
2632   /* If we found some sets of mems, add it into the active_local_stores so
2633      that it can be locally deleted if found dead or used for
2634      replace_read and redundant constant store elimination.  Otherwise mark
2635      it as cannot delete.  This simplifies the processing later.  */
2636   if (mems_found == 1)
2637     {
2638       if (active_local_stores_len++ >= max_active_local_stores)
2639 	{
2640 	  active_local_stores_len = 1;
2641 	  active_local_stores = NULL;
2642 	}
2643       insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2644       insn_info->next_local_store = active_local_stores;
2645       active_local_stores = insn_info;
2646     }
2647   else
2648     insn_info->cannot_delete = true;
2649 }
2650 
2651 
2652 /* Remove BASE from the set of active_local_stores.  This is a
2653    callback from cselib that is used to get rid of the stores in
2654    active_local_stores.  */
2655 
2656 static void
remove_useless_values(cselib_val * base)2657 remove_useless_values (cselib_val *base)
2658 {
2659   insn_info_t insn_info = active_local_stores;
2660   insn_info_t last = NULL;
2661 
2662   while (insn_info)
2663     {
2664       store_info *store_info = insn_info->store_rec;
2665       bool del = false;
2666 
2667       /* If ANY of the store_infos match the cselib group that is
2668 	 being deleted, then the insn cannot be deleted.  */
2669       while (store_info)
2670 	{
2671 	  if ((store_info->group_id == -1)
2672 	      && (store_info->cse_base == base))
2673 	    {
2674 	      del = true;
2675 	      break;
2676 	    }
2677 	  store_info = store_info->next;
2678 	}
2679 
2680       if (del)
2681 	{
2682 	  active_local_stores_len--;
2683 	  if (last)
2684 	    last->next_local_store = insn_info->next_local_store;
2685 	  else
2686 	    active_local_stores = insn_info->next_local_store;
2687 	  free_store_info (insn_info);
2688 	}
2689       else
2690 	last = insn_info;
2691 
2692       insn_info = insn_info->next_local_store;
2693     }
2694 }
2695 
2696 
2697 /* Do all of step 1.  */
2698 
2699 static void
dse_step1(void)2700 dse_step1 (void)
2701 {
2702   basic_block bb;
2703   bitmap regs_live = BITMAP_ALLOC (&reg_obstack);
2704 
2705   cselib_init (0);
2706   all_blocks = BITMAP_ALLOC (NULL);
2707   bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2708   bitmap_set_bit (all_blocks, EXIT_BLOCK);
2709 
2710   /* For -O1 reduce the maximum number of active local stores for RTL DSE
2711      since this can consume huge amounts of memory (PR89115).  */
2712   int max_active_local_stores = param_max_dse_active_local_stores;
2713   if (optimize < 2)
2714     max_active_local_stores /= 10;
2715 
2716   FOR_ALL_BB_FN (bb, cfun)
2717     {
2718       insn_info_t ptr;
2719       bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2720 
2721       memset (bb_info, 0, sizeof (dse_bb_info_type));
2722       bitmap_set_bit (all_blocks, bb->index);
2723       bb_info->regs_live = regs_live;
2724 
2725       bitmap_copy (regs_live, DF_LR_IN (bb));
2726       df_simulate_initialize_forwards (bb, regs_live);
2727 
2728       bb_table[bb->index] = bb_info;
2729       cselib_discard_hook = remove_useless_values;
2730 
2731       if (bb->index >= NUM_FIXED_BLOCKS)
2732 	{
2733 	  rtx_insn *insn;
2734 
2735 	  active_local_stores = NULL;
2736 	  active_local_stores_len = 0;
2737 	  cselib_clear_table ();
2738 
2739 	  /* Scan the insns.  */
2740 	  FOR_BB_INSNS (bb, insn)
2741 	    {
2742 	      if (INSN_P (insn))
2743 		scan_insn (bb_info, insn, max_active_local_stores);
2744 	      cselib_process_insn (insn);
2745 	      if (INSN_P (insn))
2746 		df_simulate_one_insn_forwards (bb, insn, regs_live);
2747 	    }
2748 
2749 	  /* This is something of a hack, because the global algorithm
2750 	     is supposed to take care of the case where stores go dead
2751 	     at the end of the function.  However, the global
2752 	     algorithm must take a more conservative view of block
2753 	     mode reads than the local alg does.  So to get the case
2754 	     where you have a store to the frame followed by a non
2755 	     overlapping block more read, we look at the active local
2756 	     stores at the end of the function and delete all of the
2757 	     frame and spill based ones.  */
2758 	  if (stores_off_frame_dead_at_return
2759 	      && (EDGE_COUNT (bb->succs) == 0
2760 		  || (single_succ_p (bb)
2761 		      && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2762 		      && ! crtl->calls_eh_return)))
2763 	    {
2764 	      insn_info_t i_ptr = active_local_stores;
2765 	      while (i_ptr)
2766 		{
2767 		  store_info *store_info = i_ptr->store_rec;
2768 
2769 		  /* Skip the clobbers.  */
2770 		  while (!store_info->is_set)
2771 		    store_info = store_info->next;
2772 		  if (store_info->group_id >= 0)
2773 		    {
2774 		      group_info *group = rtx_group_vec[store_info->group_id];
2775 		      if (group->frame_related && !i_ptr->cannot_delete)
2776 			delete_dead_store_insn (i_ptr);
2777 		    }
2778 
2779 		  i_ptr = i_ptr->next_local_store;
2780 		}
2781 	    }
2782 
2783 	  /* Get rid of the loads that were discovered in
2784 	     replace_read.  Cselib is finished with this block.  */
2785 	  while (deferred_change_list)
2786 	    {
2787 	      deferred_change *next = deferred_change_list->next;
2788 
2789 	      /* There is no reason to validate this change.  That was
2790 		 done earlier.  */
2791 	      *deferred_change_list->loc = deferred_change_list->reg;
2792 	      deferred_change_pool.remove (deferred_change_list);
2793 	      deferred_change_list = next;
2794 	    }
2795 
2796 	  /* Get rid of all of the cselib based store_infos in this
2797 	     block and mark the containing insns as not being
2798 	     deletable.  */
2799 	  ptr = bb_info->last_insn;
2800 	  while (ptr)
2801 	    {
2802 	      if (ptr->contains_cselib_groups)
2803 		{
2804 		  store_info *s_info = ptr->store_rec;
2805 		  while (s_info && !s_info->is_set)
2806 		    s_info = s_info->next;
2807 		  if (s_info
2808 		      && s_info->redundant_reason
2809 		      && s_info->redundant_reason->insn
2810 		      && !ptr->cannot_delete)
2811 		    {
2812 		      if (dump_file && (dump_flags & TDF_DETAILS))
2813 			fprintf (dump_file, "Locally deleting insn %d "
2814 					    "because insn %d stores the "
2815 					    "same value and couldn't be "
2816 					    "eliminated\n",
2817 				 INSN_UID (ptr->insn),
2818 				 INSN_UID (s_info->redundant_reason->insn));
2819 		      delete_dead_store_insn (ptr);
2820 		    }
2821 		  free_store_info (ptr);
2822 		}
2823 	      else
2824 		{
2825 		  store_info *s_info;
2826 
2827 		  /* Free at least positions_needed bitmaps.  */
2828 		  for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2829 		    if (s_info->is_large)
2830 		      {
2831 			BITMAP_FREE (s_info->positions_needed.large.bmap);
2832 			s_info->is_large = false;
2833 		      }
2834 		}
2835 	      ptr = ptr->prev_insn;
2836 	    }
2837 
2838 	  cse_store_info_pool.release ();
2839 	}
2840       bb_info->regs_live = NULL;
2841     }
2842 
2843   BITMAP_FREE (regs_live);
2844   cselib_finish ();
2845   rtx_group_table->empty ();
2846 }
2847 
2848 
2849 /*----------------------------------------------------------------------------
2850    Second step.
2851 
2852    Assign each byte position in the stores that we are going to
2853    analyze globally to a position in the bitmaps.  Returns true if
2854    there are any bit positions assigned.
2855 ----------------------------------------------------------------------------*/
2856 
2857 static void
dse_step2_init(void)2858 dse_step2_init (void)
2859 {
2860   unsigned int i;
2861   group_info *group;
2862 
2863   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2864     {
2865       /* For all non stack related bases, we only consider a store to
2866 	 be deletable if there are two or more stores for that
2867 	 position.  This is because it takes one store to make the
2868 	 other store redundant.  However, for the stores that are
2869 	 stack related, we consider them if there is only one store
2870 	 for the position.  We do this because the stack related
2871 	 stores can be deleted if their is no read between them and
2872 	 the end of the function.
2873 
2874 	 To make this work in the current framework, we take the stack
2875 	 related bases add all of the bits from store1 into store2.
2876 	 This has the effect of making the eligible even if there is
2877 	 only one store.   */
2878 
2879       if (stores_off_frame_dead_at_return && group->frame_related)
2880 	{
2881 	  bitmap_ior_into (group->store2_n, group->store1_n);
2882 	  bitmap_ior_into (group->store2_p, group->store1_p);
2883 	  if (dump_file && (dump_flags & TDF_DETAILS))
2884 	    fprintf (dump_file, "group %d is frame related ", i);
2885 	}
2886 
2887       group->offset_map_size_n++;
2888       group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2889 				       group->offset_map_size_n);
2890       group->offset_map_size_p++;
2891       group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2892 				       group->offset_map_size_p);
2893       group->process_globally = false;
2894       if (dump_file && (dump_flags & TDF_DETAILS))
2895 	{
2896 	  fprintf (dump_file, "group %d(%d+%d): ", i,
2897 		   (int)bitmap_count_bits (group->store2_n),
2898 		   (int)bitmap_count_bits (group->store2_p));
2899 	  bitmap_print (dump_file, group->store2_n, "n ", " ");
2900 	  bitmap_print (dump_file, group->store2_p, "p ", "\n");
2901 	}
2902     }
2903 }
2904 
2905 
2906 /* Init the offset tables.  */
2907 
2908 static bool
dse_step2(void)2909 dse_step2 (void)
2910 {
2911   unsigned int i;
2912   group_info *group;
2913   /* Position 0 is unused because 0 is used in the maps to mean
2914      unused.  */
2915   current_position = 1;
2916   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2917     {
2918       bitmap_iterator bi;
2919       unsigned int j;
2920 
2921       memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2922       memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2923       bitmap_clear (group->group_kill);
2924 
2925       EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2926 	{
2927 	  bitmap_set_bit (group->group_kill, current_position);
2928           if (bitmap_bit_p (group->escaped_n, j))
2929 	    bitmap_set_bit (kill_on_calls, current_position);
2930 	  group->offset_map_n[j] = current_position++;
2931 	  group->process_globally = true;
2932 	}
2933       EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
2934 	{
2935 	  bitmap_set_bit (group->group_kill, current_position);
2936           if (bitmap_bit_p (group->escaped_p, j))
2937 	    bitmap_set_bit (kill_on_calls, current_position);
2938 	  group->offset_map_p[j] = current_position++;
2939 	  group->process_globally = true;
2940 	}
2941     }
2942   return current_position != 1;
2943 }
2944 
2945 
2946 
2947 /*----------------------------------------------------------------------------
2948   Third step.
2949 
2950   Build the bit vectors for the transfer functions.
2951 ----------------------------------------------------------------------------*/
2952 
2953 
2954 /* Look up the bitmap index for OFFSET in GROUP_INFO.  If it is not
2955    there, return 0.  */
2956 
2957 static int
get_bitmap_index(group_info * group_info,HOST_WIDE_INT offset)2958 get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
2959 {
2960   if (offset < 0)
2961     {
2962       HOST_WIDE_INT offset_p = -offset;
2963       if (offset_p >= group_info->offset_map_size_n)
2964 	return 0;
2965       return group_info->offset_map_n[offset_p];
2966     }
2967   else
2968     {
2969       if (offset >= group_info->offset_map_size_p)
2970 	return 0;
2971       return group_info->offset_map_p[offset];
2972     }
2973 }
2974 
2975 
2976 /* Process the STORE_INFOs into the bitmaps into GEN and KILL.  KILL
2977    may be NULL. */
2978 
2979 static void
scan_stores(store_info * store_info,bitmap gen,bitmap kill)2980 scan_stores (store_info *store_info, bitmap gen, bitmap kill)
2981 {
2982   while (store_info)
2983     {
2984       HOST_WIDE_INT i, offset, width;
2985       group_info *group_info
2986 	= rtx_group_vec[store_info->group_id];
2987       /* We can (conservatively) ignore stores whose bounds aren't known;
2988 	 they simply don't generate new global dse opportunities.  */
2989       if (group_info->process_globally
2990 	  && store_info->offset.is_constant (&offset)
2991 	  && store_info->width.is_constant (&width))
2992 	{
2993 	  HOST_WIDE_INT end = offset + width;
2994 	  for (i = offset; i < end; i++)
2995 	    {
2996 	      int index = get_bitmap_index (group_info, i);
2997 	      if (index != 0)
2998 		{
2999 		  bitmap_set_bit (gen, index);
3000 		  if (kill)
3001 		    bitmap_clear_bit (kill, index);
3002 		}
3003 	    }
3004 	}
3005       store_info = store_info->next;
3006     }
3007 }
3008 
3009 
3010 /* Process the READ_INFOs into the bitmaps into GEN and KILL.  KILL
3011    may be NULL.  */
3012 
3013 static void
scan_reads(insn_info_t insn_info,bitmap gen,bitmap kill)3014 scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
3015 {
3016   read_info_t read_info = insn_info->read_rec;
3017   int i;
3018   group_info *group;
3019 
3020   /* If this insn reads the frame, kill all the frame related stores.  */
3021   if (insn_info->frame_read)
3022     {
3023       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3024 	if (group->process_globally && group->frame_related)
3025 	  {
3026 	    if (kill)
3027 	      bitmap_ior_into (kill, group->group_kill);
3028 	    bitmap_and_compl_into (gen, group->group_kill);
3029 	  }
3030     }
3031   if (insn_info->non_frame_wild_read)
3032     {
3033       /* Kill all non-frame related stores.  Kill all stores of variables that
3034          escape.  */
3035       if (kill)
3036         bitmap_ior_into (kill, kill_on_calls);
3037       bitmap_and_compl_into (gen, kill_on_calls);
3038       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3039 	if (group->process_globally && !group->frame_related)
3040 	  {
3041 	    if (kill)
3042 	      bitmap_ior_into (kill, group->group_kill);
3043 	    bitmap_and_compl_into (gen, group->group_kill);
3044 	  }
3045     }
3046   while (read_info)
3047     {
3048       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3049 	{
3050 	  if (group->process_globally)
3051 	    {
3052 	      if (i == read_info->group_id)
3053 		{
3054 		  HOST_WIDE_INT offset, width;
3055 		  /* Reads with non-constant size kill all DSE opportunities
3056 		     in the group.  */
3057 		  if (!read_info->offset.is_constant (&offset)
3058 		      || !read_info->width.is_constant (&width)
3059 		      || !known_size_p (width))
3060 		    {
3061 		      /* Handle block mode reads.  */
3062 		      if (kill)
3063 			bitmap_ior_into (kill, group->group_kill);
3064 		      bitmap_and_compl_into (gen, group->group_kill);
3065 		    }
3066 		  else
3067 		    {
3068 		      /* The groups are the same, just process the
3069 			 offsets.  */
3070 		      HOST_WIDE_INT j;
3071 		      HOST_WIDE_INT end = offset + width;
3072 		      for (j = offset; j < end; j++)
3073 			{
3074 			  int index = get_bitmap_index (group, j);
3075 			  if (index != 0)
3076 			    {
3077 			      if (kill)
3078 				bitmap_set_bit (kill, index);
3079 			      bitmap_clear_bit (gen, index);
3080 			    }
3081 			}
3082 		    }
3083 		}
3084 	      else
3085 		{
3086 		  /* The groups are different, if the alias sets
3087 		     conflict, clear the entire group.  We only need
3088 		     to apply this test if the read_info is a cselib
3089 		     read.  Anything with a constant base cannot alias
3090 		     something else with a different constant
3091 		     base.  */
3092 		  if ((read_info->group_id < 0)
3093 		      && canon_true_dependence (group->base_mem,
3094 						GET_MODE (group->base_mem),
3095 						group->canon_base_addr,
3096 						read_info->mem, NULL_RTX))
3097 		    {
3098 		      if (kill)
3099 			bitmap_ior_into (kill, group->group_kill);
3100 		      bitmap_and_compl_into (gen, group->group_kill);
3101 		    }
3102 		}
3103 	    }
3104 	}
3105 
3106       read_info = read_info->next;
3107     }
3108 }
3109 
3110 
3111 /* Return the insn in BB_INFO before the first wild read or if there
3112    are no wild reads in the block, return the last insn.  */
3113 
3114 static insn_info_t
find_insn_before_first_wild_read(bb_info_t bb_info)3115 find_insn_before_first_wild_read (bb_info_t bb_info)
3116 {
3117   insn_info_t insn_info = bb_info->last_insn;
3118   insn_info_t last_wild_read = NULL;
3119 
3120   while (insn_info)
3121     {
3122       if (insn_info->wild_read)
3123 	{
3124 	  last_wild_read = insn_info->prev_insn;
3125 	  /* Block starts with wild read.  */
3126 	  if (!last_wild_read)
3127 	    return NULL;
3128 	}
3129 
3130       insn_info = insn_info->prev_insn;
3131     }
3132 
3133   if (last_wild_read)
3134     return last_wild_read;
3135   else
3136     return bb_info->last_insn;
3137 }
3138 
3139 
3140 /* Scan the insns in BB_INFO starting at PTR and going to the top of
3141    the block in order to build the gen and kill sets for the block.
3142    We start at ptr which may be the last insn in the block or may be
3143    the first insn with a wild read.  In the latter case we are able to
3144    skip the rest of the block because it just does not matter:
3145    anything that happens is hidden by the wild read.  */
3146 
3147 static void
dse_step3_scan(basic_block bb)3148 dse_step3_scan (basic_block bb)
3149 {
3150   bb_info_t bb_info = bb_table[bb->index];
3151   insn_info_t insn_info;
3152 
3153   insn_info = find_insn_before_first_wild_read (bb_info);
3154 
3155   /* In the spill case or in the no_spill case if there is no wild
3156      read in the block, we will need a kill set.  */
3157   if (insn_info == bb_info->last_insn)
3158     {
3159       if (bb_info->kill)
3160 	bitmap_clear (bb_info->kill);
3161       else
3162 	bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3163     }
3164   else
3165     if (bb_info->kill)
3166       BITMAP_FREE (bb_info->kill);
3167 
3168   while (insn_info)
3169     {
3170       /* There may have been code deleted by the dce pass run before
3171 	 this phase.  */
3172       if (insn_info->insn && INSN_P (insn_info->insn))
3173 	{
3174 	  scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3175 	  scan_reads (insn_info, bb_info->gen, bb_info->kill);
3176 	}
3177 
3178       insn_info = insn_info->prev_insn;
3179     }
3180 }
3181 
3182 
3183 /* Set the gen set of the exit block, and also any block with no
3184    successors that does not have a wild read.  */
3185 
3186 static void
dse_step3_exit_block_scan(bb_info_t bb_info)3187 dse_step3_exit_block_scan (bb_info_t bb_info)
3188 {
3189   /* The gen set is all 0's for the exit block except for the
3190      frame_pointer_group.  */
3191 
3192   if (stores_off_frame_dead_at_return)
3193     {
3194       unsigned int i;
3195       group_info *group;
3196 
3197       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3198 	{
3199 	  if (group->process_globally && group->frame_related)
3200 	    bitmap_ior_into (bb_info->gen, group->group_kill);
3201 	}
3202     }
3203 }
3204 
3205 
3206 /* Find all of the blocks that are not backwards reachable from the
3207    exit block or any block with no successors (BB).  These are the
3208    infinite loops or infinite self loops.  These blocks will still
3209    have their bits set in UNREACHABLE_BLOCKS.  */
3210 
3211 static void
mark_reachable_blocks(sbitmap unreachable_blocks,basic_block bb)3212 mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3213 {
3214   edge e;
3215   edge_iterator ei;
3216 
3217   if (bitmap_bit_p (unreachable_blocks, bb->index))
3218     {
3219       bitmap_clear_bit (unreachable_blocks, bb->index);
3220       FOR_EACH_EDGE (e, ei, bb->preds)
3221 	{
3222 	  mark_reachable_blocks (unreachable_blocks, e->src);
3223 	}
3224     }
3225 }
3226 
3227 /* Build the transfer functions for the function.  */
3228 
3229 static void
dse_step3()3230 dse_step3 ()
3231 {
3232   basic_block bb;
3233   sbitmap_iterator sbi;
3234   bitmap all_ones = NULL;
3235   unsigned int i;
3236 
3237   auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3238   bitmap_ones (unreachable_blocks);
3239 
3240   FOR_ALL_BB_FN (bb, cfun)
3241     {
3242       bb_info_t bb_info = bb_table[bb->index];
3243       if (bb_info->gen)
3244 	bitmap_clear (bb_info->gen);
3245       else
3246 	bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3247 
3248       if (bb->index == ENTRY_BLOCK)
3249 	;
3250       else if (bb->index == EXIT_BLOCK)
3251 	dse_step3_exit_block_scan (bb_info);
3252       else
3253 	dse_step3_scan (bb);
3254       if (EDGE_COUNT (bb->succs) == 0)
3255 	mark_reachable_blocks (unreachable_blocks, bb);
3256 
3257       /* If this is the second time dataflow is run, delete the old
3258 	 sets.  */
3259       if (bb_info->in)
3260 	BITMAP_FREE (bb_info->in);
3261       if (bb_info->out)
3262 	BITMAP_FREE (bb_info->out);
3263     }
3264 
3265   /* For any block in an infinite loop, we must initialize the out set
3266      to all ones.  This could be expensive, but almost never occurs in
3267      practice. However, it is common in regression tests.  */
3268   EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3269     {
3270       if (bitmap_bit_p (all_blocks, i))
3271 	{
3272 	  bb_info_t bb_info = bb_table[i];
3273 	  if (!all_ones)
3274 	    {
3275 	      unsigned int j;
3276 	      group_info *group;
3277 
3278 	      all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3279 	      FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3280 		bitmap_ior_into (all_ones, group->group_kill);
3281 	    }
3282 	  if (!bb_info->out)
3283 	    {
3284 	      bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3285 	      bitmap_copy (bb_info->out, all_ones);
3286 	    }
3287 	}
3288     }
3289 
3290   if (all_ones)
3291     BITMAP_FREE (all_ones);
3292 }
3293 
3294 
3295 
3296 /*----------------------------------------------------------------------------
3297    Fourth step.
3298 
3299    Solve the bitvector equations.
3300 ----------------------------------------------------------------------------*/
3301 
3302 
3303 /* Confluence function for blocks with no successors.  Create an out
3304    set from the gen set of the exit block.  This block logically has
3305    the exit block as a successor.  */
3306 
3307 
3308 
3309 static void
dse_confluence_0(basic_block bb)3310 dse_confluence_0 (basic_block bb)
3311 {
3312   bb_info_t bb_info = bb_table[bb->index];
3313 
3314   if (bb->index == EXIT_BLOCK)
3315     return;
3316 
3317   if (!bb_info->out)
3318     {
3319       bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3320       bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3321     }
3322 }
3323 
3324 /* Propagate the information from the in set of the dest of E to the
3325    out set of the src of E.  If the various in or out sets are not
3326    there, that means they are all ones.  */
3327 
3328 static bool
dse_confluence_n(edge e)3329 dse_confluence_n (edge e)
3330 {
3331   bb_info_t src_info = bb_table[e->src->index];
3332   bb_info_t dest_info = bb_table[e->dest->index];
3333 
3334   if (dest_info->in)
3335     {
3336       if (src_info->out)
3337 	bitmap_and_into (src_info->out, dest_info->in);
3338       else
3339 	{
3340 	  src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3341 	  bitmap_copy (src_info->out, dest_info->in);
3342 	}
3343     }
3344   return true;
3345 }
3346 
3347 
3348 /* Propagate the info from the out to the in set of BB_INDEX's basic
3349    block.  There are three cases:
3350 
3351    1) The block has no kill set.  In this case the kill set is all
3352    ones.  It does not matter what the out set of the block is, none of
3353    the info can reach the top.  The only thing that reaches the top is
3354    the gen set and we just copy the set.
3355 
3356    2) There is a kill set but no out set and bb has successors.  In
3357    this case we just return. Eventually an out set will be created and
3358    it is better to wait than to create a set of ones.
3359 
3360    3) There is both a kill and out set.  We apply the obvious transfer
3361    function.
3362 */
3363 
3364 static bool
dse_transfer_function(int bb_index)3365 dse_transfer_function (int bb_index)
3366 {
3367   bb_info_t bb_info = bb_table[bb_index];
3368 
3369   if (bb_info->kill)
3370     {
3371       if (bb_info->out)
3372 	{
3373 	  /* Case 3 above.  */
3374 	  if (bb_info->in)
3375 	    return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3376 					 bb_info->out, bb_info->kill);
3377 	  else
3378 	    {
3379 	      bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3380 	      bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3381 				    bb_info->out, bb_info->kill);
3382 	      return true;
3383 	    }
3384 	}
3385       else
3386 	/* Case 2 above.  */
3387 	return false;
3388     }
3389   else
3390     {
3391       /* Case 1 above.  If there is already an in set, nothing
3392 	 happens.  */
3393       if (bb_info->in)
3394 	return false;
3395       else
3396 	{
3397 	  bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3398 	  bitmap_copy (bb_info->in, bb_info->gen);
3399 	  return true;
3400 	}
3401     }
3402 }
3403 
3404 /* Solve the dataflow equations.  */
3405 
3406 static void
dse_step4(void)3407 dse_step4 (void)
3408 {
3409   df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3410 		      dse_confluence_n, dse_transfer_function,
3411 	   	      all_blocks, df_get_postorder (DF_BACKWARD),
3412 		      df_get_n_blocks (DF_BACKWARD));
3413   if (dump_file && (dump_flags & TDF_DETAILS))
3414     {
3415       basic_block bb;
3416 
3417       fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3418       FOR_ALL_BB_FN (bb, cfun)
3419 	{
3420 	  bb_info_t bb_info = bb_table[bb->index];
3421 
3422 	  df_print_bb_index (bb, dump_file);
3423 	  if (bb_info->in)
3424 	    bitmap_print (dump_file, bb_info->in, "  in:   ", "\n");
3425 	  else
3426 	    fprintf (dump_file, "  in:   *MISSING*\n");
3427 	  if (bb_info->gen)
3428 	    bitmap_print (dump_file, bb_info->gen, "  gen:  ", "\n");
3429 	  else
3430 	    fprintf (dump_file, "  gen:  *MISSING*\n");
3431 	  if (bb_info->kill)
3432 	    bitmap_print (dump_file, bb_info->kill, "  kill: ", "\n");
3433 	  else
3434 	    fprintf (dump_file, "  kill: *MISSING*\n");
3435 	  if (bb_info->out)
3436 	    bitmap_print (dump_file, bb_info->out, "  out:  ", "\n");
3437 	  else
3438 	    fprintf (dump_file, "  out:  *MISSING*\n\n");
3439 	}
3440     }
3441 }
3442 
3443 
3444 
3445 /*----------------------------------------------------------------------------
3446    Fifth step.
3447 
3448    Delete the stores that can only be deleted using the global information.
3449 ----------------------------------------------------------------------------*/
3450 
3451 
3452 static void
dse_step5(void)3453 dse_step5 (void)
3454 {
3455   basic_block bb;
3456   FOR_EACH_BB_FN (bb, cfun)
3457     {
3458       bb_info_t bb_info = bb_table[bb->index];
3459       insn_info_t insn_info = bb_info->last_insn;
3460       bitmap v = bb_info->out;
3461 
3462       while (insn_info)
3463 	{
3464 	  bool deleted = false;
3465 	  if (dump_file && insn_info->insn)
3466 	    {
3467 	      fprintf (dump_file, "starting to process insn %d\n",
3468 		       INSN_UID (insn_info->insn));
3469 	      bitmap_print (dump_file, v, "  v:  ", "\n");
3470 	    }
3471 
3472 	  /* There may have been code deleted by the dce pass run before
3473 	     this phase.  */
3474 	  if (insn_info->insn
3475 	      && INSN_P (insn_info->insn)
3476 	      && (!insn_info->cannot_delete)
3477 	      && (!bitmap_empty_p (v)))
3478 	    {
3479 	      store_info *store_info = insn_info->store_rec;
3480 
3481 	      /* Try to delete the current insn.  */
3482 	      deleted = true;
3483 
3484 	      /* Skip the clobbers.  */
3485 	      while (!store_info->is_set)
3486 		store_info = store_info->next;
3487 
3488 	      HOST_WIDE_INT i, offset, width;
3489 	      group_info *group_info = rtx_group_vec[store_info->group_id];
3490 
3491 	      if (!store_info->offset.is_constant (&offset)
3492 		  || !store_info->width.is_constant (&width))
3493 		deleted = false;
3494 	      else
3495 		{
3496 		  HOST_WIDE_INT end = offset + width;
3497 		  for (i = offset; i < end; i++)
3498 		    {
3499 		      int index = get_bitmap_index (group_info, i);
3500 
3501 		      if (dump_file && (dump_flags & TDF_DETAILS))
3502 			fprintf (dump_file, "i = %d, index = %d\n",
3503 				 (int) i, index);
3504 		      if (index == 0 || !bitmap_bit_p (v, index))
3505 			{
3506 			  if (dump_file && (dump_flags & TDF_DETAILS))
3507 			    fprintf (dump_file, "failing at i = %d\n",
3508 				     (int) i);
3509 			  deleted = false;
3510 			  break;
3511 			}
3512 		    }
3513 		}
3514 	      if (deleted)
3515 		{
3516 		  if (dbg_cnt (dse)
3517 		      && check_for_inc_dec_1 (insn_info))
3518 		    {
3519 		      delete_insn (insn_info->insn);
3520 		      insn_info->insn = NULL;
3521 		      globally_deleted++;
3522 		    }
3523 		}
3524 	    }
3525 	  /* We do want to process the local info if the insn was
3526 	     deleted.  For instance, if the insn did a wild read, we
3527 	     no longer need to trash the info.  */
3528 	  if (insn_info->insn
3529 	      && INSN_P (insn_info->insn)
3530 	      && (!deleted))
3531 	    {
3532 	      scan_stores (insn_info->store_rec, v, NULL);
3533 	      if (insn_info->wild_read)
3534 		{
3535 		  if (dump_file && (dump_flags & TDF_DETAILS))
3536 		    fprintf (dump_file, "wild read\n");
3537 		  bitmap_clear (v);
3538 		}
3539 	      else if (insn_info->read_rec
3540 		       || insn_info->non_frame_wild_read
3541 		       || insn_info->frame_read)
3542 		{
3543 		  if (dump_file && (dump_flags & TDF_DETAILS))
3544 		    {
3545 		      if (!insn_info->non_frame_wild_read
3546 			  && !insn_info->frame_read)
3547 			fprintf (dump_file, "regular read\n");
3548 		      if (insn_info->non_frame_wild_read)
3549 			fprintf (dump_file, "non-frame wild read\n");
3550 		      if (insn_info->frame_read)
3551 			fprintf (dump_file, "frame read\n");
3552 		    }
3553 		  scan_reads (insn_info, v, NULL);
3554 		}
3555 	    }
3556 
3557 	  insn_info = insn_info->prev_insn;
3558 	}
3559     }
3560 }
3561 
3562 
3563 
3564 /*----------------------------------------------------------------------------
3565    Sixth step.
3566 
3567    Delete stores made redundant by earlier stores (which store the same
3568    value) that couldn't be eliminated.
3569 ----------------------------------------------------------------------------*/
3570 
3571 static void
dse_step6(void)3572 dse_step6 (void)
3573 {
3574   basic_block bb;
3575 
3576   FOR_ALL_BB_FN (bb, cfun)
3577     {
3578       bb_info_t bb_info = bb_table[bb->index];
3579       insn_info_t insn_info = bb_info->last_insn;
3580 
3581       while (insn_info)
3582 	{
3583 	  /* There may have been code deleted by the dce pass run before
3584 	     this phase.  */
3585 	  if (insn_info->insn
3586 	      && INSN_P (insn_info->insn)
3587 	      && !insn_info->cannot_delete)
3588 	    {
3589 	      store_info *s_info = insn_info->store_rec;
3590 
3591 	      while (s_info && !s_info->is_set)
3592 		s_info = s_info->next;
3593 	      if (s_info
3594 		  && s_info->redundant_reason
3595 		  && s_info->redundant_reason->insn
3596 		  && INSN_P (s_info->redundant_reason->insn))
3597 		{
3598 		  rtx_insn *rinsn = s_info->redundant_reason->insn;
3599 		  if (dump_file && (dump_flags & TDF_DETAILS))
3600 		    fprintf (dump_file, "Locally deleting insn %d "
3601 					"because insn %d stores the "
3602 					"same value and couldn't be "
3603 					"eliminated\n",
3604 					INSN_UID (insn_info->insn),
3605 					INSN_UID (rinsn));
3606 		  delete_dead_store_insn (insn_info);
3607 		}
3608 	    }
3609 	  insn_info = insn_info->prev_insn;
3610 	}
3611     }
3612 }
3613 
3614 /*----------------------------------------------------------------------------
3615    Seventh step.
3616 
3617    Destroy everything left standing.
3618 ----------------------------------------------------------------------------*/
3619 
3620 static void
dse_step7(void)3621 dse_step7 (void)
3622 {
3623   bitmap_obstack_release (&dse_bitmap_obstack);
3624   obstack_free (&dse_obstack, NULL);
3625 
3626   end_alias_analysis ();
3627   free (bb_table);
3628   delete rtx_group_table;
3629   rtx_group_table = NULL;
3630   rtx_group_vec.release ();
3631   BITMAP_FREE (all_blocks);
3632   BITMAP_FREE (scratch);
3633 
3634   rtx_store_info_pool.release ();
3635   read_info_type_pool.release ();
3636   insn_info_type_pool.release ();
3637   dse_bb_info_type_pool.release ();
3638   group_info_pool.release ();
3639   deferred_change_pool.release ();
3640 }
3641 
3642 
3643 /* -------------------------------------------------------------------------
3644    DSE
3645    ------------------------------------------------------------------------- */
3646 
3647 /* Callback for running pass_rtl_dse.  */
3648 
3649 static unsigned int
rest_of_handle_dse(void)3650 rest_of_handle_dse (void)
3651 {
3652   df_set_flags (DF_DEFER_INSN_RESCAN);
3653 
3654   /* Need the notes since we must track live hardregs in the forwards
3655      direction.  */
3656   df_note_add_problem ();
3657   df_analyze ();
3658 
3659   dse_step0 ();
3660   dse_step1 ();
3661   dse_step2_init ();
3662   if (dse_step2 ())
3663     {
3664       df_set_flags (DF_LR_RUN_DCE);
3665       df_analyze ();
3666       if (dump_file && (dump_flags & TDF_DETAILS))
3667 	fprintf (dump_file, "doing global processing\n");
3668       dse_step3 ();
3669       dse_step4 ();
3670       dse_step5 ();
3671     }
3672 
3673   dse_step6 ();
3674   dse_step7 ();
3675 
3676   if (dump_file)
3677     fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3678 	     locally_deleted, globally_deleted);
3679 
3680   /* DSE can eliminate potentially-trapping MEMs.
3681      Remove any EH edges associated with them.  */
3682   if ((locally_deleted || globally_deleted)
3683       && cfun->can_throw_non_call_exceptions
3684       && purge_all_dead_edges ())
3685     {
3686       free_dominance_info (CDI_DOMINATORS);
3687       cleanup_cfg (0);
3688     }
3689 
3690   return 0;
3691 }
3692 
3693 namespace {
3694 
3695 const pass_data pass_data_rtl_dse1 =
3696 {
3697   RTL_PASS, /* type */
3698   "dse1", /* name */
3699   OPTGROUP_NONE, /* optinfo_flags */
3700   TV_DSE1, /* tv_id */
3701   0, /* properties_required */
3702   0, /* properties_provided */
3703   0, /* properties_destroyed */
3704   0, /* todo_flags_start */
3705   TODO_df_finish, /* todo_flags_finish */
3706 };
3707 
3708 class pass_rtl_dse1 : public rtl_opt_pass
3709 {
3710 public:
pass_rtl_dse1(gcc::context * ctxt)3711   pass_rtl_dse1 (gcc::context *ctxt)
3712     : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3713   {}
3714 
3715   /* opt_pass methods: */
gate(function *)3716   virtual bool gate (function *)
3717     {
3718       return optimize > 0 && flag_dse && dbg_cnt (dse1);
3719     }
3720 
execute(function *)3721   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3722 
3723 }; // class pass_rtl_dse1
3724 
3725 } // anon namespace
3726 
3727 rtl_opt_pass *
make_pass_rtl_dse1(gcc::context * ctxt)3728 make_pass_rtl_dse1 (gcc::context *ctxt)
3729 {
3730   return new pass_rtl_dse1 (ctxt);
3731 }
3732 
3733 namespace {
3734 
3735 const pass_data pass_data_rtl_dse2 =
3736 {
3737   RTL_PASS, /* type */
3738   "dse2", /* name */
3739   OPTGROUP_NONE, /* optinfo_flags */
3740   TV_DSE2, /* tv_id */
3741   0, /* properties_required */
3742   0, /* properties_provided */
3743   0, /* properties_destroyed */
3744   0, /* todo_flags_start */
3745   TODO_df_finish, /* todo_flags_finish */
3746 };
3747 
3748 class pass_rtl_dse2 : public rtl_opt_pass
3749 {
3750 public:
pass_rtl_dse2(gcc::context * ctxt)3751   pass_rtl_dse2 (gcc::context *ctxt)
3752     : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3753   {}
3754 
3755   /* opt_pass methods: */
gate(function *)3756   virtual bool gate (function *)
3757     {
3758       return optimize > 0 && flag_dse && dbg_cnt (dse2);
3759     }
3760 
execute(function *)3761   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3762 
3763 }; // class pass_rtl_dse2
3764 
3765 } // anon namespace
3766 
3767 rtl_opt_pass *
make_pass_rtl_dse2(gcc::context * ctxt)3768 make_pass_rtl_dse2 (gcc::context *ctxt)
3769 {
3770   return new pass_rtl_dse2 (ctxt);
3771 }
3772