1 /* RTL dead store elimination.
2    Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 
4    Contributed by Richard Sandiford <rsandifor@codesourcery.com>
5    and Kenneth Zadeck <zadeck@naturalbridge.com>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #undef BASELINE
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "predict.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "gimple-ssa.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "alias.h"
43 #include "stor-layout.h"
44 #include "cfgrtl.h"
45 #include "cselib.h"
46 #include "tree-pass.h"
47 #include "explow.h"
48 #include "expr.h"
49 #include "dbgcnt.h"
50 #include "rtl-iter.h"
51 #include "cfgcleanup.h"
52 #include "calls.h"
53 
54 /* This file contains three techniques for performing Dead Store
55    Elimination (dse).
56 
57    * The first technique performs dse locally on any base address.  It
58    is based on the cselib which is a local value numbering technique.
59    This technique is local to a basic block but deals with a fairly
60    general addresses.
61 
62    * The second technique performs dse globally but is restricted to
63    base addresses that are either constant or are relative to the
64    frame_pointer.
65 
66    * The third technique, (which is only done after register allocation)
67    processes the spill slots.  This differs from the second
68    technique because it takes advantage of the fact that spilling is
69    completely free from the effects of aliasing.
70 
71    Logically, dse is a backwards dataflow problem.  A store can be
72    deleted if it if cannot be reached in the backward direction by any
73    use of the value being stored.  However, the local technique uses a
74    forwards scan of the basic block because cselib requires that the
75    block be processed in that order.
76 
77    The pass is logically broken into 7 steps:
78 
79    0) Initialization.
80 
81    1) The local algorithm, as well as scanning the insns for the two
82    global algorithms.
83 
84    2) Analysis to see if the global algs are necessary.  In the case
85    of stores base on a constant address, there must be at least two
86    stores to that address, to make it possible to delete some of the
87    stores.  In the case of stores off of the frame or spill related
88    stores, only one store to an address is necessary because those
89    stores die at the end of the function.
90 
91    3) Set up the global dataflow equations based on processing the
92    info parsed in the first step.
93 
94    4) Solve the dataflow equations.
95 
96    5) Delete the insns that the global analysis has indicated are
97    unnecessary.
98 
99    6) Delete insns that store the same value as preceding store
100    where the earlier store couldn't be eliminated.
101 
102    7) Cleanup.
103 
104    This step uses cselib and canon_rtx to build the largest expression
105    possible for each address.  This pass is a forwards pass through
106    each basic block.  From the point of view of the global technique,
107    the first pass could examine a block in either direction.  The
108    forwards ordering is to accommodate cselib.
109 
110    We make a simplifying assumption: addresses fall into four broad
111    categories:
112 
113    1) base has rtx_varies_p == false, offset is constant.
114    2) base has rtx_varies_p == false, offset variable.
115    3) base has rtx_varies_p == true, offset constant.
116    4) base has rtx_varies_p == true, offset variable.
117 
118    The local passes are able to process all 4 kinds of addresses.  The
119    global pass only handles 1).
120 
121    The global problem is formulated as follows:
122 
123      A store, S1, to address A, where A is not relative to the stack
124      frame, can be eliminated if all paths from S1 to the end of the
125      function contain another store to A before a read to A.
126 
127      If the address A is relative to the stack frame, a store S2 to A
128      can be eliminated if there are no paths from S2 that reach the
129      end of the function that read A before another store to A.  In
130      this case S2 can be deleted if there are paths from S2 to the
131      end of the function that have no reads or writes to A.  This
132      second case allows stores to the stack frame to be deleted that
133      would otherwise die when the function returns.  This cannot be
134      done if stores_off_frame_dead_at_return is not true.  See the doc
135      for that variable for when this variable is false.
136 
137      The global problem is formulated as a backwards set union
138      dataflow problem where the stores are the gens and reads are the
139      kills.  Set union problems are rare and require some special
140      handling given our representation of bitmaps.  A straightforward
141      implementation requires a lot of bitmaps filled with 1s.
142      These are expensive and cumbersome in our bitmap formulation so
143      care has been taken to avoid large vectors filled with 1s.  See
144      the comments in bb_info and in the dataflow confluence functions
145      for details.
146 
147    There are two places for further enhancements to this algorithm:
148 
149    1) The original dse which was embedded in a pass called flow also
150    did local address forwarding.  For example in
151 
152    A <- r100
153    ... <- A
154 
155    flow would replace the right hand side of the second insn with a
156    reference to r100.  Most of the information is available to add this
157    to this pass.  It has not done it because it is a lot of work in
158    the case that either r100 is assigned to between the first and
159    second insn and/or the second insn is a load of part of the value
160    stored by the first insn.
161 
162    insn 5 in gcc.c-torture/compile/990203-1.c simple case.
163    insn 15 in gcc.c-torture/execute/20001017-2.c simple case.
164    insn 25 in gcc.c-torture/execute/20001026-1.c simple case.
165    insn 44 in gcc.c-torture/execute/20010910-1.c simple case.
166 
167    2) The cleaning up of spill code is quite profitable.  It currently
168    depends on reading tea leaves and chicken entrails left by reload.
169    This pass depends on reload creating a singleton alias set for each
170    spill slot and telling the next dse pass which of these alias sets
171    are the singletons.  Rather than analyze the addresses of the
172    spills, dse's spill processing just does analysis of the loads and
173    stores that use those alias sets.  There are three cases where this
174    falls short:
175 
176      a) Reload sometimes creates the slot for one mode of access, and
177      then inserts loads and/or stores for a smaller mode.  In this
178      case, the current code just punts on the slot.  The proper thing
179      to do is to back out and use one bit vector position for each
180      byte of the entity associated with the slot.  This depends on
181      KNOWING that reload always generates the accesses for each of the
182      bytes in some canonical (read that easy to understand several
183      passes after reload happens) way.
184 
185      b) Reload sometimes decides that spill slot it allocated was not
186      large enough for the mode and goes back and allocates more slots
187      with the same mode and alias set.  The backout in this case is a
188      little more graceful than (a).  In this case the slot is unmarked
189      as being a spill slot and if final address comes out to be based
190      off the frame pointer, the global algorithm handles this slot.
191 
192      c) For any pass that may prespill, there is currently no
193      mechanism to tell the dse pass that the slot being used has the
194      special properties that reload uses.  It may be that all that is
195      required is to have those passes make the same calls that reload
196      does, assuming that the alias sets can be manipulated in the same
197      way.  */
198 
199 /* There are limits to the size of constant offsets we model for the
200    global problem.  There are certainly test cases, that exceed this
201    limit, however, it is unlikely that there are important programs
202    that really have constant offsets this size.  */
203 #define MAX_OFFSET (64 * 1024)
204 
205 /* Obstack for the DSE dataflow bitmaps.  We don't want to put these
206    on the default obstack because these bitmaps can grow quite large
207    (~2GB for the small (!) test case of PR54146) and we'll hold on to
208    all that memory until the end of the compiler run.
209    As a bonus, delete_tree_live_info can destroy all the bitmaps by just
210    releasing the whole obstack.  */
211 static bitmap_obstack dse_bitmap_obstack;
212 
213 /* Obstack for other data.  As for above: Kinda nice to be able to
214    throw it all away at the end in one big sweep.  */
215 static struct obstack dse_obstack;
216 
217 /* Scratch bitmap for cselib's cselib_expand_value_rtx.  */
218 static bitmap scratch = NULL;
219 
220 struct insn_info_type;
221 
222 /* This structure holds information about a candidate store.  */
223 class store_info
224 {
225 public:
226 
227   /* False means this is a clobber.  */
228   bool is_set;
229 
230   /* False if a single HOST_WIDE_INT bitmap is used for positions_needed.  */
231   bool is_large;
232 
233   /* The id of the mem group of the base address.  If rtx_varies_p is
234      true, this is -1.  Otherwise, it is the index into the group
235      table.  */
236   int group_id;
237 
238   /* This is the cselib value.  */
239   cselib_val *cse_base;
240 
241   /* This canonized mem.  */
242   rtx mem;
243 
244   /* Canonized MEM address for use by canon_true_dependence.  */
245   rtx mem_addr;
246 
247   /* The offset of the first byte associated with the operation.  */
248   poly_int64 offset;
249 
250   /* The number of bytes covered by the operation.  This is always exact
251      and known (rather than -1).  */
252   poly_int64 width;
253 
254   union
255     {
256       /* A bitmask as wide as the number of bytes in the word that
257 	 contains a 1 if the byte may be needed.  The store is unused if
258 	 all of the bits are 0.  This is used if IS_LARGE is false.  */
259       unsigned HOST_WIDE_INT small_bitmask;
260 
261       struct
262 	{
263 	  /* A bitmap with one bit per byte, or null if the number of
264 	     bytes isn't known at compile time.  A cleared bit means
265 	     the position is needed.  Used if IS_LARGE is true.  */
266 	  bitmap bmap;
267 
268 	  /* When BITMAP is nonnull, this counts the number of set bits
269 	     (i.e. unneeded bytes) in the bitmap.  If it is equal to
270 	     WIDTH, the whole store is unused.
271 
272 	     When BITMAP is null:
273 	     - the store is definitely not needed when COUNT == 1
274 	     - all the store is needed when COUNT == 0 and RHS is nonnull
275 	     - otherwise we don't know which parts of the store are needed.  */
276 	  int count;
277 	} large;
278     } positions_needed;
279 
280   /* The next store info for this insn.  */
281   class store_info *next;
282 
283   /* The right hand side of the store.  This is used if there is a
284      subsequent reload of the mems address somewhere later in the
285      basic block.  */
286   rtx rhs;
287 
288   /* If rhs is or holds a constant, this contains that constant,
289      otherwise NULL.  */
290   rtx const_rhs;
291 
292   /* Set if this store stores the same constant value as REDUNDANT_REASON
293      insn stored.  These aren't eliminated early, because doing that
294      might prevent the earlier larger store to be eliminated.  */
295   struct insn_info_type *redundant_reason;
296 };
297 
298 /* Return a bitmask with the first N low bits set.  */
299 
300 static unsigned HOST_WIDE_INT
lowpart_bitmask(int n)301 lowpart_bitmask (int n)
302 {
303   unsigned HOST_WIDE_INT mask = HOST_WIDE_INT_M1U;
304   return mask >> (HOST_BITS_PER_WIDE_INT - n);
305 }
306 
307 static object_allocator<store_info> cse_store_info_pool ("cse_store_info_pool");
308 
309 static object_allocator<store_info> rtx_store_info_pool ("rtx_store_info_pool");
310 
311 /* This structure holds information about a load.  These are only
312    built for rtx bases.  */
313 class read_info_type
314 {
315 public:
316   /* The id of the mem group of the base address.  */
317   int group_id;
318 
319   /* The offset of the first byte associated with the operation.  */
320   poly_int64 offset;
321 
322   /* The number of bytes covered by the operation, or -1 if not known.  */
323   poly_int64 width;
324 
325   /* The mem being read.  */
326   rtx mem;
327 
328   /* The next read_info for this insn.  */
329   class read_info_type *next;
330 };
331 typedef class read_info_type *read_info_t;
332 
333 static object_allocator<read_info_type> read_info_type_pool ("read_info_pool");
334 
335 /* One of these records is created for each insn.  */
336 
337 struct insn_info_type
338 {
339   /* Set true if the insn contains a store but the insn itself cannot
340      be deleted.  This is set if the insn is a parallel and there is
341      more than one non dead output or if the insn is in some way
342      volatile.  */
343   bool cannot_delete;
344 
345   /* This field is only used by the global algorithm.  It is set true
346      if the insn contains any read of mem except for a (1).  This is
347      also set if the insn is a call or has a clobber mem.  If the insn
348      contains a wild read, the use_rec will be null.  */
349   bool wild_read;
350 
351   /* This is true only for CALL instructions which could potentially read
352      any non-frame memory location. This field is used by the global
353      algorithm.  */
354   bool non_frame_wild_read;
355 
356   /* This field is only used for the processing of const functions.
357      These functions cannot read memory, but they can read the stack
358      because that is where they may get their parms.  We need to be
359      this conservative because, like the store motion pass, we don't
360      consider CALL_INSN_FUNCTION_USAGE when processing call insns.
361      Moreover, we need to distinguish two cases:
362      1. Before reload (register elimination), the stores related to
363 	outgoing arguments are stack pointer based and thus deemed
364 	of non-constant base in this pass.  This requires special
365 	handling but also means that the frame pointer based stores
366 	need not be killed upon encountering a const function call.
367      2. After reload, the stores related to outgoing arguments can be
368 	either stack pointer or hard frame pointer based.  This means
369 	that we have no other choice than also killing all the frame
370 	pointer based stores upon encountering a const function call.
371      This field is set after reload for const function calls and before
372      reload for const tail function calls on targets where arg pointer
373      is the frame pointer.  Having this set is less severe than a wild
374      read, it just means that all the frame related stores are killed
375      rather than all the stores.  */
376   bool frame_read;
377 
378   /* This field is only used for the processing of const functions.
379      It is set if the insn may contain a stack pointer based store.  */
380   bool stack_pointer_based;
381 
382   /* This is true if any of the sets within the store contains a
383      cselib base.  Such stores can only be deleted by the local
384      algorithm.  */
385   bool contains_cselib_groups;
386 
387   /* The insn. */
388   rtx_insn *insn;
389 
390   /* The list of mem sets or mem clobbers that are contained in this
391      insn.  If the insn is deletable, it contains only one mem set.
392      But it could also contain clobbers.  Insns that contain more than
393      one mem set are not deletable, but each of those mems are here in
394      order to provide info to delete other insns.  */
395   store_info *store_rec;
396 
397   /* The linked list of mem uses in this insn.  Only the reads from
398      rtx bases are listed here.  The reads to cselib bases are
399      completely processed during the first scan and so are never
400      created.  */
401   read_info_t read_rec;
402 
403   /* The live fixed registers.  We assume only fixed registers can
404      cause trouble by being clobbered from an expanded pattern;
405      storing only the live fixed registers (rather than all registers)
406      means less memory needs to be allocated / copied for the individual
407      stores.  */
408   regset fixed_regs_live;
409 
410   /* The prev insn in the basic block.  */
411   struct insn_info_type * prev_insn;
412 
413   /* The linked list of insns that are in consideration for removal in
414      the forwards pass through the basic block.  This pointer may be
415      trash as it is not cleared when a wild read occurs.  The only
416      time it is guaranteed to be correct is when the traversal starts
417      at active_local_stores.  */
418   struct insn_info_type * next_local_store;
419 };
420 typedef struct insn_info_type *insn_info_t;
421 
422 static object_allocator<insn_info_type> insn_info_type_pool ("insn_info_pool");
423 
424 /* The linked list of stores that are under consideration in this
425    basic block.  */
426 static insn_info_t active_local_stores;
427 static int active_local_stores_len;
428 
429 struct dse_bb_info_type
430 {
431   /* Pointer to the insn info for the last insn in the block.  These
432      are linked so this is how all of the insns are reached.  During
433      scanning this is the current insn being scanned.  */
434   insn_info_t last_insn;
435 
436   /* The info for the global dataflow problem.  */
437 
438 
439   /* This is set if the transfer function should and in the wild_read
440      bitmap before applying the kill and gen sets.  That vector knocks
441      out most of the bits in the bitmap and thus speeds up the
442      operations.  */
443   bool apply_wild_read;
444 
445   /* The following 4 bitvectors hold information about which positions
446      of which stores are live or dead.  They are indexed by
447      get_bitmap_index.  */
448 
449   /* The set of store positions that exist in this block before a wild read.  */
450   bitmap gen;
451 
452   /* The set of load positions that exist in this block above the
453      same position of a store.  */
454   bitmap kill;
455 
456   /* The set of stores that reach the top of the block without being
457      killed by a read.
458 
459      Do not represent the in if it is all ones.  Note that this is
460      what the bitvector should logically be initialized to for a set
461      intersection problem.  However, like the kill set, this is too
462      expensive.  So initially, the in set will only be created for the
463      exit block and any block that contains a wild read.  */
464   bitmap in;
465 
466   /* The set of stores that reach the bottom of the block from it's
467      successors.
468 
469      Do not represent the in if it is all ones.  Note that this is
470      what the bitvector should logically be initialized to for a set
471      intersection problem.  However, like the kill and in set, this is
472      too expensive.  So what is done is that the confluence operator
473      just initializes the vector from one of the out sets of the
474      successors of the block.  */
475   bitmap out;
476 
477   /* The following bitvector is indexed by the reg number.  It
478      contains the set of regs that are live at the current instruction
479      being processed.  While it contains info for all of the
480      registers, only the hard registers are actually examined.  It is used
481      to assure that shift and/or add sequences that are inserted do not
482      accidentally clobber live hard regs.  */
483   bitmap regs_live;
484 };
485 
486 typedef struct dse_bb_info_type *bb_info_t;
487 
488 static object_allocator<dse_bb_info_type> dse_bb_info_type_pool
489   ("bb_info_pool");
490 
491 /* Table to hold all bb_infos.  */
492 static bb_info_t *bb_table;
493 
494 /* There is a group_info for each rtx base that is used to reference
495    memory.  There are also not many of the rtx bases because they are
496    very limited in scope.  */
497 
498 struct group_info
499 {
500   /* The actual base of the address.  */
501   rtx rtx_base;
502 
503   /* The sequential id of the base.  This allows us to have a
504      canonical ordering of these that is not based on addresses.  */
505   int id;
506 
507   /* True if there are any positions that are to be processed
508      globally.  */
509   bool process_globally;
510 
511   /* True if the base of this group is either the frame_pointer or
512      hard_frame_pointer.  */
513   bool frame_related;
514 
515   /* A mem wrapped around the base pointer for the group in order to do
516      read dependency.  It must be given BLKmode in order to encompass all
517      the possible offsets from the base.  */
518   rtx base_mem;
519 
520   /* Canonized version of base_mem's address.  */
521   rtx canon_base_addr;
522 
523   /* These two sets of two bitmaps are used to keep track of how many
524      stores are actually referencing that position from this base.  We
525      only do this for rtx bases as this will be used to assign
526      positions in the bitmaps for the global problem.  Bit N is set in
527      store1 on the first store for offset N.  Bit N is set in store2
528      for the second store to offset N.  This is all we need since we
529      only care about offsets that have two or more stores for them.
530 
531      The "_n" suffix is for offsets less than 0 and the "_p" suffix is
532      for 0 and greater offsets.
533 
534      There is one special case here, for stores into the stack frame,
535      we will or store1 into store2 before deciding which stores look
536      at globally.  This is because stores to the stack frame that have
537      no other reads before the end of the function can also be
538      deleted.  */
539   bitmap store1_n, store1_p, store2_n, store2_p;
540 
541   /* These bitmaps keep track of offsets in this group escape this function.
542      An offset escapes if it corresponds to a named variable whose
543      addressable flag is set.  */
544   bitmap escaped_n, escaped_p;
545 
546   /* The positions in this bitmap have the same assignments as the in,
547      out, gen and kill bitmaps.  This bitmap is all zeros except for
548      the positions that are occupied by stores for this group.  */
549   bitmap group_kill;
550 
551   /* The offset_map is used to map the offsets from this base into
552      positions in the global bitmaps.  It is only created after all of
553      the all of stores have been scanned and we know which ones we
554      care about.  */
555   int *offset_map_n, *offset_map_p;
556   int offset_map_size_n, offset_map_size_p;
557 };
558 
559 static object_allocator<group_info> group_info_pool ("rtx_group_info_pool");
560 
561 /* Index into the rtx_group_vec.  */
562 static int rtx_group_next_id;
563 
564 
565 static vec<group_info *> rtx_group_vec;
566 
567 
568 /* This structure holds the set of changes that are being deferred
569    when removing read operation.  See replace_read.  */
570 struct deferred_change
571 {
572 
573   /* The mem that is being replaced.  */
574   rtx *loc;
575 
576   /* The reg it is being replaced with.  */
577   rtx reg;
578 
579   struct deferred_change *next;
580 };
581 
582 static object_allocator<deferred_change> deferred_change_pool
583   ("deferred_change_pool");
584 
585 static deferred_change *deferred_change_list = NULL;
586 
587 /* This is true except if cfun->stdarg -- i.e. we cannot do
588    this for vararg functions because they play games with the frame.  */
589 static bool stores_off_frame_dead_at_return;
590 
591 /* Counter for stats.  */
592 static int globally_deleted;
593 static int locally_deleted;
594 
595 static bitmap all_blocks;
596 
597 /* Locations that are killed by calls in the global phase.  */
598 static bitmap kill_on_calls;
599 
600 /* The number of bits used in the global bitmaps.  */
601 static unsigned int current_position;
602 
603 /* Print offset range [OFFSET, OFFSET + WIDTH) to FILE.  */
604 
605 static void
print_range(FILE * file,poly_int64 offset,poly_int64 width)606 print_range (FILE *file, poly_int64 offset, poly_int64 width)
607 {
608   fprintf (file, "[");
609   print_dec (offset, file, SIGNED);
610   fprintf (file, "..");
611   print_dec (offset + width, file, SIGNED);
612   fprintf (file, ")");
613 }
614 
615 /*----------------------------------------------------------------------------
616    Zeroth step.
617 
618    Initialization.
619 ----------------------------------------------------------------------------*/
620 
621 
622 /* Hashtable callbacks for maintaining the "bases" field of
623    store_group_info, given that the addresses are function invariants.  */
624 
625 struct invariant_group_base_hasher : nofree_ptr_hash <group_info>
626 {
627   static inline hashval_t hash (const group_info *);
628   static inline bool equal (const group_info *, const group_info *);
629 };
630 
631 inline bool
equal(const group_info * gi1,const group_info * gi2)632 invariant_group_base_hasher::equal (const group_info *gi1,
633 				    const group_info *gi2)
634 {
635   return rtx_equal_p (gi1->rtx_base, gi2->rtx_base);
636 }
637 
638 inline hashval_t
hash(const group_info * gi)639 invariant_group_base_hasher::hash (const group_info *gi)
640 {
641   int do_not_record;
642   return hash_rtx (gi->rtx_base, Pmode, &do_not_record, NULL, false);
643 }
644 
645 /* Tables of group_info structures, hashed by base value.  */
646 static hash_table<invariant_group_base_hasher> *rtx_group_table;
647 
648 
649 /* Get the GROUP for BASE.  Add a new group if it is not there.  */
650 
651 static group_info *
get_group_info(rtx base)652 get_group_info (rtx base)
653 {
654   struct group_info tmp_gi;
655   group_info *gi;
656   group_info **slot;
657 
658   gcc_assert (base != NULL_RTX);
659 
660   /* Find the store_base_info structure for BASE, creating a new one
661      if necessary.  */
662   tmp_gi.rtx_base = base;
663   slot = rtx_group_table->find_slot (&tmp_gi, INSERT);
664   gi = *slot;
665 
666   if (gi == NULL)
667     {
668       *slot = gi = group_info_pool.allocate ();
669       gi->rtx_base = base;
670       gi->id = rtx_group_next_id++;
671       gi->base_mem = gen_rtx_MEM (BLKmode, base);
672       gi->canon_base_addr = canon_rtx (base);
673       gi->store1_n = BITMAP_ALLOC (&dse_bitmap_obstack);
674       gi->store1_p = BITMAP_ALLOC (&dse_bitmap_obstack);
675       gi->store2_n = BITMAP_ALLOC (&dse_bitmap_obstack);
676       gi->store2_p = BITMAP_ALLOC (&dse_bitmap_obstack);
677       gi->escaped_p = BITMAP_ALLOC (&dse_bitmap_obstack);
678       gi->escaped_n = BITMAP_ALLOC (&dse_bitmap_obstack);
679       gi->group_kill = BITMAP_ALLOC (&dse_bitmap_obstack);
680       gi->process_globally = false;
681       gi->frame_related =
682 	(base == frame_pointer_rtx) || (base == hard_frame_pointer_rtx);
683       gi->offset_map_size_n = 0;
684       gi->offset_map_size_p = 0;
685       gi->offset_map_n = NULL;
686       gi->offset_map_p = NULL;
687       rtx_group_vec.safe_push (gi);
688     }
689 
690   return gi;
691 }
692 
693 
694 /* Initialization of data structures.  */
695 
696 static void
dse_step0(void)697 dse_step0 (void)
698 {
699   locally_deleted = 0;
700   globally_deleted = 0;
701 
702   bitmap_obstack_initialize (&dse_bitmap_obstack);
703   gcc_obstack_init (&dse_obstack);
704 
705   scratch = BITMAP_ALLOC (&reg_obstack);
706   kill_on_calls = BITMAP_ALLOC (&dse_bitmap_obstack);
707 
708 
709   rtx_group_table = new hash_table<invariant_group_base_hasher> (11);
710 
711   bb_table = XNEWVEC (bb_info_t, last_basic_block_for_fn (cfun));
712   rtx_group_next_id = 0;
713 
714   stores_off_frame_dead_at_return = !cfun->stdarg;
715 
716   init_alias_analysis ();
717 }
718 
719 
720 
721 /*----------------------------------------------------------------------------
722    First step.
723 
724    Scan all of the insns.  Any random ordering of the blocks is fine.
725    Each block is scanned in forward order to accommodate cselib which
726    is used to remove stores with non-constant bases.
727 ----------------------------------------------------------------------------*/
728 
729 /* Delete all of the store_info recs from INSN_INFO.  */
730 
731 static void
free_store_info(insn_info_t insn_info)732 free_store_info (insn_info_t insn_info)
733 {
734   store_info *cur = insn_info->store_rec;
735   while (cur)
736     {
737       store_info *next = cur->next;
738       if (cur->is_large)
739 	BITMAP_FREE (cur->positions_needed.large.bmap);
740       if (cur->cse_base)
741 	cse_store_info_pool.remove (cur);
742       else
743 	rtx_store_info_pool.remove (cur);
744       cur = next;
745     }
746 
747   insn_info->cannot_delete = true;
748   insn_info->contains_cselib_groups = false;
749   insn_info->store_rec = NULL;
750 }
751 
752 struct note_add_store_info
753 {
754   rtx_insn *first, *current;
755   regset fixed_regs_live;
756   bool failure;
757 };
758 
759 /* Callback for emit_inc_dec_insn_before via note_stores.
760    Check if a register is clobbered which is live afterwards.  */
761 
762 static void
note_add_store(rtx loc,const_rtx expr ATTRIBUTE_UNUSED,void * data)763 note_add_store (rtx loc, const_rtx expr ATTRIBUTE_UNUSED, void *data)
764 {
765   rtx_insn *insn;
766   note_add_store_info *info = (note_add_store_info *) data;
767 
768   if (!REG_P (loc))
769     return;
770 
771   /* If this register is referenced by the current or an earlier insn,
772      that's OK.  E.g. this applies to the register that is being incremented
773      with this addition.  */
774   for (insn = info->first;
775        insn != NEXT_INSN (info->current);
776        insn = NEXT_INSN (insn))
777     if (reg_referenced_p (loc, PATTERN (insn)))
778       return;
779 
780   /* If we come here, we have a clobber of a register that's only OK
781      if that register is not live.  If we don't have liveness information
782      available, fail now.  */
783   if (!info->fixed_regs_live)
784     {
785       info->failure = true;
786       return;
787     }
788   /* Now check if this is a live fixed register.  */
789   unsigned int end_regno = END_REGNO (loc);
790   for (unsigned int regno = REGNO (loc); regno < end_regno; ++regno)
791     if (REGNO_REG_SET_P (info->fixed_regs_live, regno))
792       info->failure = true;
793 }
794 
795 /* Callback for for_each_inc_dec that emits an INSN that sets DEST to
796    SRC + SRCOFF before insn ARG.  */
797 
798 static int
emit_inc_dec_insn_before(rtx mem ATTRIBUTE_UNUSED,rtx op ATTRIBUTE_UNUSED,rtx dest,rtx src,rtx srcoff,void * arg)799 emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED,
800 			  rtx op ATTRIBUTE_UNUSED,
801 			  rtx dest, rtx src, rtx srcoff, void *arg)
802 {
803   insn_info_t insn_info = (insn_info_t) arg;
804   rtx_insn *insn = insn_info->insn, *new_insn, *cur;
805   note_add_store_info info;
806 
807   /* We can reuse all operands without copying, because we are about
808      to delete the insn that contained it.  */
809   if (srcoff)
810     {
811       start_sequence ();
812       emit_insn (gen_add3_insn (dest, src, srcoff));
813       new_insn = get_insns ();
814       end_sequence ();
815     }
816   else
817     new_insn = gen_move_insn (dest, src);
818   info.first = new_insn;
819   info.fixed_regs_live = insn_info->fixed_regs_live;
820   info.failure = false;
821   for (cur = new_insn; cur; cur = NEXT_INSN (cur))
822     {
823       info.current = cur;
824       note_stores (cur, note_add_store, &info);
825     }
826 
827   /* If a failure was flagged above, return 1 so that for_each_inc_dec will
828      return it immediately, communicating the failure to its caller.  */
829   if (info.failure)
830     return 1;
831 
832   emit_insn_before (new_insn, insn);
833 
834   return 0;
835 }
836 
837 /* Before we delete INSN_INFO->INSN, make sure that the auto inc/dec, if it
838    is there, is split into a separate insn.
839    Return true on success (or if there was nothing to do), false on failure.  */
840 
841 static bool
check_for_inc_dec_1(insn_info_t insn_info)842 check_for_inc_dec_1 (insn_info_t insn_info)
843 {
844   rtx_insn *insn = insn_info->insn;
845   rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
846   if (note)
847     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
848 			     insn_info) == 0;
849 
850   /* Punt on stack pushes, those don't have REG_INC notes and we are
851      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
852   subrtx_iterator::array_type array;
853   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
854     {
855       const_rtx x = *iter;
856       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
857 	return false;
858     }
859 
860   return true;
861 }
862 
863 
864 /* Entry point for postreload.  If you work on reload_cse, or you need this
865    anywhere else, consider if you can provide register liveness information
866    and add a parameter to this function so that it can be passed down in
867    insn_info.fixed_regs_live.  */
868 bool
check_for_inc_dec(rtx_insn * insn)869 check_for_inc_dec (rtx_insn *insn)
870 {
871   insn_info_type insn_info;
872   rtx note;
873 
874   insn_info.insn = insn;
875   insn_info.fixed_regs_live = NULL;
876   note = find_reg_note (insn, REG_INC, NULL_RTX);
877   if (note)
878     return for_each_inc_dec (PATTERN (insn), emit_inc_dec_insn_before,
879 			     &insn_info) == 0;
880 
881   /* Punt on stack pushes, those don't have REG_INC notes and we are
882      unprepared to deal with distribution of REG_ARGS_SIZE notes etc.  */
883   subrtx_iterator::array_type array;
884   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
885     {
886       const_rtx x = *iter;
887       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
888 	return false;
889     }
890 
891   return true;
892 }
893 
894 /* Delete the insn and free all of the fields inside INSN_INFO.  */
895 
896 static void
delete_dead_store_insn(insn_info_t insn_info)897 delete_dead_store_insn (insn_info_t insn_info)
898 {
899   read_info_t read_info;
900 
901   if (!dbg_cnt (dse))
902     return;
903 
904   if (!check_for_inc_dec_1 (insn_info))
905     return;
906   if (dump_file && (dump_flags & TDF_DETAILS))
907     fprintf (dump_file, "Locally deleting insn %d\n",
908 	     INSN_UID (insn_info->insn));
909 
910   free_store_info (insn_info);
911   read_info = insn_info->read_rec;
912 
913   while (read_info)
914     {
915       read_info_t next = read_info->next;
916       read_info_type_pool.remove (read_info);
917       read_info = next;
918     }
919   insn_info->read_rec = NULL;
920 
921   delete_insn (insn_info->insn);
922   locally_deleted++;
923   insn_info->insn = NULL;
924 
925   insn_info->wild_read = false;
926 }
927 
928 /* Return whether DECL, a local variable, can possibly escape the current
929    function scope.  */
930 
931 static bool
local_variable_can_escape(tree decl)932 local_variable_can_escape (tree decl)
933 {
934   if (TREE_ADDRESSABLE (decl))
935     return true;
936 
937   /* If this is a partitioned variable, we need to consider all the variables
938      in the partition.  This is necessary because a store into one of them can
939      be replaced with a store into another and this may not change the outcome
940      of the escape analysis.  */
941   if (cfun->gimple_df->decls_to_pointers != NULL)
942     {
943       tree *namep = cfun->gimple_df->decls_to_pointers->get (decl);
944       if (namep)
945 	return TREE_ADDRESSABLE (*namep);
946     }
947 
948   return false;
949 }
950 
951 /* Return whether EXPR can possibly escape the current function scope.  */
952 
953 static bool
can_escape(tree expr)954 can_escape (tree expr)
955 {
956   tree base;
957   if (!expr)
958     return true;
959   base = get_base_address (expr);
960   if (DECL_P (base)
961       && !may_be_aliased (base)
962       && !(VAR_P (base)
963 	   && !DECL_EXTERNAL (base)
964 	   && !TREE_STATIC (base)
965 	   && local_variable_can_escape (base)))
966     return false;
967   return true;
968 }
969 
970 /* Set the store* bitmaps offset_map_size* fields in GROUP based on
971    OFFSET and WIDTH.  */
972 
973 static void
set_usage_bits(group_info * group,poly_int64 offset,poly_int64 width,tree expr)974 set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
975                 tree expr)
976 {
977   /* Non-constant offsets and widths act as global kills, so there's no point
978      trying to use them to derive global DSE candidates.  */
979   HOST_WIDE_INT i, const_offset, const_width;
980   bool expr_escapes = can_escape (expr);
981   if (offset.is_constant (&const_offset)
982       && width.is_constant (&const_width)
983       && const_offset > -MAX_OFFSET
984       && const_offset + const_width < MAX_OFFSET)
985     for (i = const_offset; i < const_offset + const_width; ++i)
986       {
987 	bitmap store1;
988 	bitmap store2;
989         bitmap escaped;
990 	int ai;
991 	if (i < 0)
992 	  {
993 	    store1 = group->store1_n;
994 	    store2 = group->store2_n;
995 	    escaped = group->escaped_n;
996 	    ai = -i;
997 	  }
998 	else
999 	  {
1000 	    store1 = group->store1_p;
1001 	    store2 = group->store2_p;
1002 	    escaped = group->escaped_p;
1003 	    ai = i;
1004 	  }
1005 
1006 	if (!bitmap_set_bit (store1, ai))
1007 	  bitmap_set_bit (store2, ai);
1008 	else
1009 	  {
1010 	    if (i < 0)
1011 	      {
1012 		if (group->offset_map_size_n < ai)
1013 		  group->offset_map_size_n = ai;
1014 	      }
1015 	    else
1016 	      {
1017 		if (group->offset_map_size_p < ai)
1018 		  group->offset_map_size_p = ai;
1019 	      }
1020 	  }
1021         if (expr_escapes)
1022           bitmap_set_bit (escaped, ai);
1023       }
1024 }
1025 
1026 static void
reset_active_stores(void)1027 reset_active_stores (void)
1028 {
1029   active_local_stores = NULL;
1030   active_local_stores_len = 0;
1031 }
1032 
1033 /* Free all READ_REC of the LAST_INSN of BB_INFO.  */
1034 
1035 static void
free_read_records(bb_info_t bb_info)1036 free_read_records (bb_info_t bb_info)
1037 {
1038   insn_info_t insn_info = bb_info->last_insn;
1039   read_info_t *ptr = &insn_info->read_rec;
1040   while (*ptr)
1041     {
1042       read_info_t next = (*ptr)->next;
1043       read_info_type_pool.remove (*ptr);
1044       *ptr = next;
1045     }
1046 }
1047 
1048 /* Set the BB_INFO so that the last insn is marked as a wild read.  */
1049 
1050 static void
add_wild_read(bb_info_t bb_info)1051 add_wild_read (bb_info_t bb_info)
1052 {
1053   insn_info_t insn_info = bb_info->last_insn;
1054   insn_info->wild_read = true;
1055   free_read_records (bb_info);
1056   reset_active_stores ();
1057 }
1058 
1059 /* Set the BB_INFO so that the last insn is marked as a wild read of
1060    non-frame locations.  */
1061 
1062 static void
add_non_frame_wild_read(bb_info_t bb_info)1063 add_non_frame_wild_read (bb_info_t bb_info)
1064 {
1065   insn_info_t insn_info = bb_info->last_insn;
1066   insn_info->non_frame_wild_read = true;
1067   free_read_records (bb_info);
1068   reset_active_stores ();
1069 }
1070 
1071 /* Return true if X is a constant or one of the registers that behave
1072    as a constant over the life of a function.  This is equivalent to
1073    !rtx_varies_p for memory addresses.  */
1074 
1075 static bool
const_or_frame_p(rtx x)1076 const_or_frame_p (rtx x)
1077 {
1078   if (CONSTANT_P (x))
1079     return true;
1080 
1081   if (GET_CODE (x) == REG)
1082     {
1083       /* Note that we have to test for the actual rtx used for the frame
1084 	 and arg pointers and not just the register number in case we have
1085 	 eliminated the frame and/or arg pointer and are using it
1086 	 for pseudos.  */
1087       if (x == frame_pointer_rtx || x == hard_frame_pointer_rtx
1088 	  /* The arg pointer varies if it is not a fixed register.  */
1089 	  || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])
1090 	  || x == pic_offset_table_rtx)
1091 	return true;
1092       return false;
1093     }
1094 
1095   return false;
1096 }
1097 
1098 /* Take all reasonable action to put the address of MEM into the form
1099    that we can do analysis on.
1100 
1101    The gold standard is to get the address into the form: address +
1102    OFFSET where address is something that rtx_varies_p considers a
1103    constant.  When we can get the address in this form, we can do
1104    global analysis on it.  Note that for constant bases, address is
1105    not actually returned, only the group_id.  The address can be
1106    obtained from that.
1107 
1108    If that fails, we try cselib to get a value we can at least use
1109    locally.  If that fails we return false.
1110 
1111    The GROUP_ID is set to -1 for cselib bases and the index of the
1112    group for non_varying bases.
1113 
1114    FOR_READ is true if this is a mem read and false if not.  */
1115 
1116 static bool
canon_address(rtx mem,int * group_id,poly_int64 * offset,cselib_val ** base)1117 canon_address (rtx mem,
1118 	       int *group_id,
1119 	       poly_int64 *offset,
1120 	       cselib_val **base)
1121 {
1122   machine_mode address_mode = get_address_mode (mem);
1123   rtx mem_address = XEXP (mem, 0);
1124   rtx expanded_address, address;
1125   int expanded;
1126 
1127   cselib_lookup (mem_address, address_mode, 1, GET_MODE (mem));
1128 
1129   if (dump_file && (dump_flags & TDF_DETAILS))
1130     {
1131       fprintf (dump_file, "  mem: ");
1132       print_inline_rtx (dump_file, mem_address, 0);
1133       fprintf (dump_file, "\n");
1134     }
1135 
1136   /* First see if just canon_rtx (mem_address) is const or frame,
1137      if not, try cselib_expand_value_rtx and call canon_rtx on that.  */
1138   address = NULL_RTX;
1139   for (expanded = 0; expanded < 2; expanded++)
1140     {
1141       if (expanded)
1142 	{
1143 	  /* Use cselib to replace all of the reg references with the full
1144 	     expression.  This will take care of the case where we have
1145 
1146 	     r_x = base + offset;
1147 	     val = *r_x;
1148 
1149 	     by making it into
1150 
1151 	     val = *(base + offset);  */
1152 
1153 	  expanded_address = cselib_expand_value_rtx (mem_address,
1154 						      scratch, 5);
1155 
1156 	  /* If this fails, just go with the address from first
1157 	     iteration.  */
1158 	  if (!expanded_address)
1159 	    break;
1160 	}
1161       else
1162 	expanded_address = mem_address;
1163 
1164       /* Split the address into canonical BASE + OFFSET terms.  */
1165       address = canon_rtx (expanded_address);
1166 
1167       *offset = 0;
1168 
1169       if (dump_file && (dump_flags & TDF_DETAILS))
1170 	{
1171 	  if (expanded)
1172 	    {
1173 	      fprintf (dump_file, "\n   after cselib_expand address: ");
1174 	      print_inline_rtx (dump_file, expanded_address, 0);
1175 	      fprintf (dump_file, "\n");
1176 	    }
1177 
1178 	  fprintf (dump_file, "\n   after canon_rtx address: ");
1179 	  print_inline_rtx (dump_file, address, 0);
1180 	  fprintf (dump_file, "\n");
1181 	}
1182 
1183       if (GET_CODE (address) == CONST)
1184 	address = XEXP (address, 0);
1185 
1186       address = strip_offset_and_add (address, offset);
1187 
1188       if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
1189 	  && const_or_frame_p (address))
1190 	{
1191 	  group_info *group = get_group_info (address);
1192 
1193 	  if (dump_file && (dump_flags & TDF_DETAILS))
1194 	    {
1195 	      fprintf (dump_file, "  gid=%d offset=", group->id);
1196 	      print_dec (*offset, dump_file);
1197 	      fprintf (dump_file, "\n");
1198 	    }
1199 	  *base = NULL;
1200 	  *group_id = group->id;
1201 	  return true;
1202 	}
1203     }
1204 
1205   *base = cselib_lookup (address, address_mode, true, GET_MODE (mem));
1206   *group_id = -1;
1207 
1208   if (*base == NULL)
1209     {
1210       if (dump_file && (dump_flags & TDF_DETAILS))
1211 	fprintf (dump_file, " no cselib val - should be a wild read.\n");
1212       return false;
1213     }
1214   if (dump_file && (dump_flags & TDF_DETAILS))
1215     {
1216       fprintf (dump_file, "  varying cselib base=%u:%u offset = ",
1217 	       (*base)->uid, (*base)->hash);
1218       print_dec (*offset, dump_file);
1219       fprintf (dump_file, "\n");
1220     }
1221   return true;
1222 }
1223 
1224 
1225 /* Clear the rhs field from the active_local_stores array.  */
1226 
1227 static void
clear_rhs_from_active_local_stores(void)1228 clear_rhs_from_active_local_stores (void)
1229 {
1230   insn_info_t ptr = active_local_stores;
1231 
1232   while (ptr)
1233     {
1234       store_info *store_info = ptr->store_rec;
1235       /* Skip the clobbers.  */
1236       while (!store_info->is_set)
1237 	store_info = store_info->next;
1238 
1239       store_info->rhs = NULL;
1240       store_info->const_rhs = NULL;
1241 
1242       ptr = ptr->next_local_store;
1243     }
1244 }
1245 
1246 
1247 /* Mark byte POS bytes from the beginning of store S_INFO as unneeded.  */
1248 
1249 static inline void
set_position_unneeded(store_info * s_info,int pos)1250 set_position_unneeded (store_info *s_info, int pos)
1251 {
1252   if (__builtin_expect (s_info->is_large, false))
1253     {
1254       if (bitmap_set_bit (s_info->positions_needed.large.bmap, pos))
1255 	s_info->positions_needed.large.count++;
1256     }
1257   else
1258     s_info->positions_needed.small_bitmask
1259       &= ~(HOST_WIDE_INT_1U << pos);
1260 }
1261 
1262 /* Mark the whole store S_INFO as unneeded.  */
1263 
1264 static inline void
set_all_positions_unneeded(store_info * s_info)1265 set_all_positions_unneeded (store_info *s_info)
1266 {
1267   if (__builtin_expect (s_info->is_large, false))
1268     {
1269       HOST_WIDE_INT width;
1270       if (s_info->width.is_constant (&width))
1271 	{
1272 	  bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
1273 	  s_info->positions_needed.large.count = width;
1274 	}
1275       else
1276 	{
1277 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1278 	  s_info->positions_needed.large.count = 1;
1279 	}
1280     }
1281   else
1282     s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
1283 }
1284 
1285 /* Return TRUE if any bytes from S_INFO store are needed.  */
1286 
1287 static inline bool
any_positions_needed_p(store_info * s_info)1288 any_positions_needed_p (store_info *s_info)
1289 {
1290   if (__builtin_expect (s_info->is_large, false))
1291     {
1292       HOST_WIDE_INT width;
1293       if (s_info->width.is_constant (&width))
1294 	{
1295 	  gcc_checking_assert (s_info->positions_needed.large.bmap);
1296 	  return s_info->positions_needed.large.count < width;
1297 	}
1298       else
1299 	{
1300 	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
1301 	  return s_info->positions_needed.large.count == 0;
1302 	}
1303     }
1304   else
1305     return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
1306 }
1307 
1308 /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
1309    store are known to be needed.  */
1310 
1311 static inline bool
all_positions_needed_p(store_info * s_info,poly_int64 start,poly_int64 width)1312 all_positions_needed_p (store_info *s_info, poly_int64 start,
1313 			poly_int64 width)
1314 {
1315   gcc_assert (s_info->rhs);
1316   if (!s_info->width.is_constant ())
1317     {
1318       gcc_assert (s_info->is_large
1319 		  && !s_info->positions_needed.large.bmap);
1320       return s_info->positions_needed.large.count == 0;
1321     }
1322 
1323   /* Otherwise, if START and WIDTH are non-constant, we're asking about
1324      a non-constant region of a constant-sized store.  We can't say for
1325      sure that all positions are needed.  */
1326   HOST_WIDE_INT const_start, const_width;
1327   if (!start.is_constant (&const_start)
1328       || !width.is_constant (&const_width))
1329     return false;
1330 
1331   if (__builtin_expect (s_info->is_large, false))
1332     {
1333       for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
1334 	if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
1335 	  return false;
1336       return true;
1337     }
1338   else
1339     {
1340       unsigned HOST_WIDE_INT mask
1341 	= lowpart_bitmask (const_width) << const_start;
1342       return (s_info->positions_needed.small_bitmask & mask) == mask;
1343     }
1344 }
1345 
1346 
1347 static rtx get_stored_val (store_info *, machine_mode, poly_int64,
1348 			   poly_int64, basic_block, bool);
1349 
1350 
1351 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
1352    there is a candidate store, after adding it to the appropriate
1353    local store group if so.  */
1354 
1355 static int
record_store(rtx body,bb_info_t bb_info)1356 record_store (rtx body, bb_info_t bb_info)
1357 {
1358   rtx mem, rhs, const_rhs, mem_addr;
1359   poly_int64 offset = 0;
1360   poly_int64 width = 0;
1361   insn_info_t insn_info = bb_info->last_insn;
1362   store_info *store_info = NULL;
1363   int group_id;
1364   cselib_val *base = NULL;
1365   insn_info_t ptr, last, redundant_reason;
1366   bool store_is_unused;
1367 
1368   if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
1369     return 0;
1370 
1371   mem = SET_DEST (body);
1372 
1373   /* If this is not used, then this cannot be used to keep the insn
1374      from being deleted.  On the other hand, it does provide something
1375      that can be used to prove that another store is dead.  */
1376   store_is_unused
1377     = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
1378 
1379   /* Check whether that value is a suitable memory location.  */
1380   if (!MEM_P (mem))
1381     {
1382       /* If the set or clobber is unused, then it does not effect our
1383 	 ability to get rid of the entire insn.  */
1384       if (!store_is_unused)
1385 	insn_info->cannot_delete = true;
1386       return 0;
1387     }
1388 
1389   /* At this point we know mem is a mem. */
1390   if (GET_MODE (mem) == BLKmode)
1391     {
1392       HOST_WIDE_INT const_size;
1393       if (GET_CODE (XEXP (mem, 0)) == SCRATCH)
1394 	{
1395 	  if (dump_file && (dump_flags & TDF_DETAILS))
1396 	    fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
1397 	  add_wild_read (bb_info);
1398 	  insn_info->cannot_delete = true;
1399 	  return 0;
1400 	}
1401       /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
1402 	 as memset (addr, 0, 36);  */
1403       else if (!MEM_SIZE_KNOWN_P (mem)
1404 	       || maybe_le (MEM_SIZE (mem), 0)
1405 	       /* This is a limit on the bitmap size, which is only relevant
1406 		  for constant-sized MEMs.  */
1407 	       || (MEM_SIZE (mem).is_constant (&const_size)
1408 		   && const_size > MAX_OFFSET)
1409 	       || GET_CODE (body) != SET
1410 	       || !CONST_INT_P (SET_SRC (body)))
1411 	{
1412 	  if (!store_is_unused)
1413 	    {
1414 	      /* If the set or clobber is unused, then it does not effect our
1415 		 ability to get rid of the entire insn.  */
1416 	      insn_info->cannot_delete = true;
1417 	      clear_rhs_from_active_local_stores ();
1418 	    }
1419 	  return 0;
1420 	}
1421     }
1422 
1423   /* We can still process a volatile mem, we just cannot delete it.  */
1424   if (MEM_VOLATILE_P (mem))
1425     insn_info->cannot_delete = true;
1426 
1427   if (!canon_address (mem, &group_id, &offset, &base))
1428     {
1429       clear_rhs_from_active_local_stores ();
1430       return 0;
1431     }
1432 
1433   if (GET_MODE (mem) == BLKmode)
1434     width = MEM_SIZE (mem);
1435   else
1436     width = GET_MODE_SIZE (GET_MODE (mem));
1437 
1438   if (!endpoint_representable_p (offset, width))
1439     {
1440       clear_rhs_from_active_local_stores ();
1441       return 0;
1442     }
1443 
1444   if (known_eq (width, 0))
1445     return 0;
1446 
1447   if (group_id >= 0)
1448     {
1449       /* In the restrictive case where the base is a constant or the
1450 	 frame pointer we can do global analysis.  */
1451 
1452       group_info *group
1453 	= rtx_group_vec[group_id];
1454       tree expr = MEM_EXPR (mem);
1455 
1456       store_info = rtx_store_info_pool.allocate ();
1457       set_usage_bits (group, offset, width, expr);
1458 
1459       if (dump_file && (dump_flags & TDF_DETAILS))
1460 	{
1461 	  fprintf (dump_file, " processing const base store gid=%d",
1462 		   group_id);
1463 	  print_range (dump_file, offset, width);
1464 	  fprintf (dump_file, "\n");
1465 	}
1466     }
1467   else
1468     {
1469       if (may_be_sp_based_p (XEXP (mem, 0)))
1470 	insn_info->stack_pointer_based = true;
1471       insn_info->contains_cselib_groups = true;
1472 
1473       store_info = cse_store_info_pool.allocate ();
1474       group_id = -1;
1475 
1476       if (dump_file && (dump_flags & TDF_DETAILS))
1477 	{
1478 	  fprintf (dump_file, " processing cselib store ");
1479 	  print_range (dump_file, offset, width);
1480 	  fprintf (dump_file, "\n");
1481 	}
1482     }
1483 
1484   const_rhs = rhs = NULL_RTX;
1485   if (GET_CODE (body) == SET
1486       /* No place to keep the value after ra.  */
1487       && !reload_completed
1488       && (REG_P (SET_SRC (body))
1489 	  || GET_CODE (SET_SRC (body)) == SUBREG
1490 	  || CONSTANT_P (SET_SRC (body)))
1491       && !MEM_VOLATILE_P (mem)
1492       /* Sometimes the store and reload is used for truncation and
1493 	 rounding.  */
1494       && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
1495     {
1496       rhs = SET_SRC (body);
1497       if (CONSTANT_P (rhs))
1498 	const_rhs = rhs;
1499       else if (body == PATTERN (insn_info->insn))
1500 	{
1501 	  rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
1502 	  if (tem && CONSTANT_P (XEXP (tem, 0)))
1503 	    const_rhs = XEXP (tem, 0);
1504 	}
1505       if (const_rhs == NULL_RTX && REG_P (rhs))
1506 	{
1507 	  rtx tem = cselib_expand_value_rtx (rhs, scratch, 5);
1508 
1509 	  if (tem && CONSTANT_P (tem))
1510 	    const_rhs = tem;
1511 	}
1512     }
1513 
1514   /* Check to see if this stores causes some other stores to be
1515      dead.  */
1516   ptr = active_local_stores;
1517   last = NULL;
1518   redundant_reason = NULL;
1519   mem = canon_rtx (mem);
1520 
1521   if (group_id < 0)
1522     mem_addr = base->val_rtx;
1523   else
1524     {
1525       group_info *group = rtx_group_vec[group_id];
1526       mem_addr = group->canon_base_addr;
1527     }
1528   if (maybe_ne (offset, 0))
1529     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
1530 
1531   while (ptr)
1532     {
1533       insn_info_t next = ptr->next_local_store;
1534       class store_info *s_info = ptr->store_rec;
1535       bool del = true;
1536 
1537       /* Skip the clobbers. We delete the active insn if this insn
1538 	 shadows the set.  To have been put on the active list, it
1539 	 has exactly on set. */
1540       while (!s_info->is_set)
1541 	s_info = s_info->next;
1542 
1543       if (s_info->group_id == group_id && s_info->cse_base == base)
1544 	{
1545 	  HOST_WIDE_INT i;
1546 	  if (dump_file && (dump_flags & TDF_DETAILS))
1547 	    {
1548 	      fprintf (dump_file, "    trying store in insn=%d gid=%d",
1549 		       INSN_UID (ptr->insn), s_info->group_id);
1550 	      print_range (dump_file, s_info->offset, s_info->width);
1551 	      fprintf (dump_file, "\n");
1552 	    }
1553 
1554 	  /* Even if PTR won't be eliminated as unneeded, if both
1555 	     PTR and this insn store the same constant value, we might
1556 	     eliminate this insn instead.  */
1557 	  if (s_info->const_rhs
1558 	      && const_rhs
1559 	      && known_subrange_p (offset, width,
1560 				   s_info->offset, s_info->width)
1561 	      && all_positions_needed_p (s_info, offset - s_info->offset,
1562 					 width)
1563 	      /* We can only remove the later store if the earlier aliases
1564 		 at least all accesses the later one.  */
1565 	      && ((MEM_ALIAS_SET (mem) == MEM_ALIAS_SET (s_info->mem)
1566 		   || alias_set_subset_of (MEM_ALIAS_SET (mem),
1567 					   MEM_ALIAS_SET (s_info->mem)))
1568 		  && (!MEM_EXPR (s_info->mem)
1569 		      || refs_same_for_tbaa_p (MEM_EXPR (s_info->mem),
1570 					       MEM_EXPR (mem)))))
1571 	    {
1572 	      if (GET_MODE (mem) == BLKmode)
1573 		{
1574 		  if (GET_MODE (s_info->mem) == BLKmode
1575 		      && s_info->const_rhs == const_rhs)
1576 		    redundant_reason = ptr;
1577 		}
1578 	      else if (s_info->const_rhs == const0_rtx
1579 		       && const_rhs == const0_rtx)
1580 		redundant_reason = ptr;
1581 	      else
1582 		{
1583 		  rtx val;
1584 		  start_sequence ();
1585 		  val = get_stored_val (s_info, GET_MODE (mem), offset, width,
1586 					BLOCK_FOR_INSN (insn_info->insn),
1587 					true);
1588 		  if (get_insns () != NULL)
1589 		    val = NULL_RTX;
1590 		  end_sequence ();
1591 		  if (val && rtx_equal_p (val, const_rhs))
1592 		    redundant_reason = ptr;
1593 		}
1594 	    }
1595 
1596 	  HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
1597 	  if (known_subrange_p (s_info->offset, s_info->width, offset, width))
1598 	    /* The new store touches every byte that S_INFO does.  */
1599 	    set_all_positions_unneeded (s_info);
1600 	  else if ((offset - s_info->offset).is_constant (&begin_unneeded)
1601 		   && s_info->width.is_constant (&const_s_width)
1602 		   && width.is_constant (&const_width))
1603 	    {
1604 	      HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
1605 	      begin_unneeded = MAX (begin_unneeded, 0);
1606 	      end_unneeded = MIN (end_unneeded, const_s_width);
1607 	      for (i = begin_unneeded; i < end_unneeded; ++i)
1608 		set_position_unneeded (s_info, i);
1609 	    }
1610 	  else
1611 	    {
1612 	      /* We don't know which parts of S_INFO are needed and
1613 		 which aren't, so invalidate the RHS.  */
1614 	      s_info->rhs = NULL;
1615 	      s_info->const_rhs = NULL;
1616 	    }
1617 	}
1618       else if (s_info->rhs)
1619 	/* Need to see if it is possible for this store to overwrite
1620 	   the value of store_info.  If it is, set the rhs to NULL to
1621 	   keep it from being used to remove a load.  */
1622 	{
1623 	  if (canon_output_dependence (s_info->mem, true,
1624 				       mem, GET_MODE (mem),
1625 				       mem_addr))
1626 	    {
1627 	      s_info->rhs = NULL;
1628 	      s_info->const_rhs = NULL;
1629 	    }
1630 	}
1631 
1632       /* An insn can be deleted if every position of every one of
1633 	 its s_infos is zero.  */
1634       if (any_positions_needed_p (s_info))
1635 	del = false;
1636 
1637       if (del)
1638 	{
1639 	  insn_info_t insn_to_delete = ptr;
1640 
1641 	  active_local_stores_len--;
1642 	  if (last)
1643 	    last->next_local_store = ptr->next_local_store;
1644 	  else
1645 	    active_local_stores = ptr->next_local_store;
1646 
1647 	  if (!insn_to_delete->cannot_delete)
1648 	    delete_dead_store_insn (insn_to_delete);
1649 	}
1650       else
1651 	last = ptr;
1652 
1653       ptr = next;
1654     }
1655 
1656   /* Finish filling in the store_info.  */
1657   store_info->next = insn_info->store_rec;
1658   insn_info->store_rec = store_info;
1659   store_info->mem = mem;
1660   store_info->mem_addr = mem_addr;
1661   store_info->cse_base = base;
1662   HOST_WIDE_INT const_width;
1663   if (!width.is_constant (&const_width))
1664     {
1665       store_info->is_large = true;
1666       store_info->positions_needed.large.count = 0;
1667       store_info->positions_needed.large.bmap = NULL;
1668     }
1669   else if (const_width > HOST_BITS_PER_WIDE_INT)
1670     {
1671       store_info->is_large = true;
1672       store_info->positions_needed.large.count = 0;
1673       store_info->positions_needed.large.bmap = BITMAP_ALLOC (&dse_bitmap_obstack);
1674     }
1675   else
1676     {
1677       store_info->is_large = false;
1678       store_info->positions_needed.small_bitmask
1679 	= lowpart_bitmask (const_width);
1680     }
1681   store_info->group_id = group_id;
1682   store_info->offset = offset;
1683   store_info->width = width;
1684   store_info->is_set = GET_CODE (body) == SET;
1685   store_info->rhs = rhs;
1686   store_info->const_rhs = const_rhs;
1687   store_info->redundant_reason = redundant_reason;
1688 
1689   /* If this is a clobber, we return 0.  We will only be able to
1690      delete this insn if there is only one store USED store, but we
1691      can use the clobber to delete other stores earlier.  */
1692   return store_info->is_set ? 1 : 0;
1693 }
1694 
1695 
1696 static void
dump_insn_info(const char * start,insn_info_t insn_info)1697 dump_insn_info (const char * start, insn_info_t insn_info)
1698 {
1699   fprintf (dump_file, "%s insn=%d %s\n", start,
1700 	   INSN_UID (insn_info->insn),
1701 	   insn_info->store_rec ? "has store" : "naked");
1702 }
1703 
1704 
1705 /* If the modes are different and the value's source and target do not
1706    line up, we need to extract the value from lower part of the rhs of
1707    the store, shift it, and then put it into a form that can be shoved
1708    into the read_insn.  This function generates a right SHIFT of a
1709    value that is at least ACCESS_SIZE bytes wide of READ_MODE.  The
1710    shift sequence is returned or NULL if we failed to find a
1711    shift.  */
1712 
1713 static rtx
find_shift_sequence(poly_int64 access_size,store_info * store_info,machine_mode read_mode,poly_int64 shift,bool speed,bool require_cst)1714 find_shift_sequence (poly_int64 access_size,
1715 		     store_info *store_info,
1716 		     machine_mode read_mode,
1717 		     poly_int64 shift, bool speed, bool require_cst)
1718 {
1719   machine_mode store_mode = GET_MODE (store_info->mem);
1720   scalar_int_mode new_mode;
1721   rtx read_reg = NULL;
1722 
1723   /* If a constant was stored into memory, try to simplify it here,
1724      otherwise the cost of the shift might preclude this optimization
1725      e.g. at -Os, even when no actual shift will be needed.  */
1726   if (store_info->const_rhs)
1727     {
1728       auto new_mode = smallest_int_mode_for_size (access_size * BITS_PER_UNIT);
1729       auto byte = subreg_lowpart_offset (new_mode, store_mode);
1730       rtx ret
1731 	= simplify_subreg (new_mode, store_info->const_rhs, store_mode, byte);
1732       if (ret && CONSTANT_P (ret))
1733 	{
1734 	  rtx shift_rtx = gen_int_shift_amount (new_mode, shift);
1735 	  ret = simplify_const_binary_operation (LSHIFTRT, new_mode, ret,
1736 						 shift_rtx);
1737 	  if (ret && CONSTANT_P (ret))
1738 	    {
1739 	      byte = subreg_lowpart_offset (read_mode, new_mode);
1740 	      ret = simplify_subreg (read_mode, ret, new_mode, byte);
1741 	      if (ret && CONSTANT_P (ret)
1742 		  && (set_src_cost (ret, read_mode, speed)
1743 		      <= COSTS_N_INSNS (1)))
1744 		return ret;
1745 	    }
1746 	}
1747     }
1748 
1749   if (require_cst)
1750     return NULL_RTX;
1751 
1752   /* Some machines like the x86 have shift insns for each size of
1753      operand.  Other machines like the ppc or the ia-64 may only have
1754      shift insns that shift values within 32 or 64 bit registers.
1755      This loop tries to find the smallest shift insn that will right
1756      justify the value we want to read but is available in one insn on
1757      the machine.  */
1758 
1759   opt_scalar_int_mode new_mode_iter;
1760   FOR_EACH_MODE_IN_CLASS (new_mode_iter, MODE_INT)
1761     {
1762       rtx target, new_reg, new_lhs;
1763       rtx_insn *shift_seq, *insn;
1764       int cost;
1765 
1766       new_mode = new_mode_iter.require ();
1767       if (GET_MODE_BITSIZE (new_mode) > BITS_PER_WORD)
1768 	break;
1769       if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (read_mode)))
1770 	continue;
1771 
1772       /* Try a wider mode if truncating the store mode to NEW_MODE
1773 	 requires a real instruction.  */
1774       if (maybe_lt (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode))
1775 	  && !TRULY_NOOP_TRUNCATION_MODES_P (new_mode, store_mode))
1776 	continue;
1777 
1778       /* Also try a wider mode if the necessary punning is either not
1779 	 desirable or not possible.  */
1780       if (!CONSTANT_P (store_info->rhs)
1781 	  && !targetm.modes_tieable_p (new_mode, store_mode))
1782 	continue;
1783 
1784       if (multiple_p (shift, GET_MODE_BITSIZE (new_mode))
1785 	  && known_le (GET_MODE_SIZE (new_mode), GET_MODE_SIZE (store_mode)))
1786 	{
1787 	  /* Try to implement the shift using a subreg.  */
1788 	  poly_int64 offset
1789 	    = subreg_offset_from_lsb (new_mode, store_mode, shift);
1790 	  rtx rhs_subreg = simplify_gen_subreg (new_mode, store_info->rhs,
1791 						store_mode, offset);
1792 	  if (rhs_subreg)
1793 	    {
1794 	      read_reg
1795 		= extract_low_bits (read_mode, new_mode, copy_rtx (rhs_subreg));
1796 	      break;
1797 	    }
1798 	}
1799 
1800       if (maybe_lt (GET_MODE_SIZE (new_mode), access_size))
1801 	continue;
1802 
1803       new_reg = gen_reg_rtx (new_mode);
1804 
1805       start_sequence ();
1806 
1807       /* In theory we could also check for an ashr.  Ian Taylor knows
1808 	 of one dsp where the cost of these two was not the same.  But
1809 	 this really is a rare case anyway.  */
1810       target = expand_binop (new_mode, lshr_optab, new_reg,
1811 			     gen_int_shift_amount (new_mode, shift),
1812 			     new_reg, 1, OPTAB_DIRECT);
1813 
1814       shift_seq = get_insns ();
1815       end_sequence ();
1816 
1817       if (target != new_reg || shift_seq == NULL)
1818 	continue;
1819 
1820       cost = 0;
1821       for (insn = shift_seq; insn != NULL_RTX; insn = NEXT_INSN (insn))
1822 	if (INSN_P (insn))
1823 	  cost += insn_cost (insn, speed);
1824 
1825       /* The computation up to here is essentially independent
1826 	 of the arguments and could be precomputed.  It may
1827 	 not be worth doing so.  We could precompute if
1828 	 worthwhile or at least cache the results.  The result
1829 	 technically depends on both SHIFT and ACCESS_SIZE,
1830 	 but in practice the answer will depend only on ACCESS_SIZE.  */
1831 
1832       if (cost > COSTS_N_INSNS (1))
1833 	continue;
1834 
1835       new_lhs = extract_low_bits (new_mode, store_mode,
1836 				  copy_rtx (store_info->rhs));
1837       if (new_lhs == NULL_RTX)
1838 	continue;
1839 
1840       /* We found an acceptable shift.  Generate a move to
1841 	 take the value from the store and put it into the
1842 	 shift pseudo, then shift it, then generate another
1843 	 move to put in into the target of the read.  */
1844       emit_move_insn (new_reg, new_lhs);
1845       emit_insn (shift_seq);
1846       read_reg = extract_low_bits (read_mode, new_mode, new_reg);
1847       break;
1848     }
1849 
1850   return read_reg;
1851 }
1852 
1853 
1854 /* Call back for note_stores to find the hard regs set or clobbered by
1855    insn.  Data is a bitmap of the hardregs set so far.  */
1856 
1857 static void
look_for_hardregs(rtx x,const_rtx pat ATTRIBUTE_UNUSED,void * data)1858 look_for_hardregs (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
1859 {
1860   bitmap regs_set = (bitmap) data;
1861 
1862   if (REG_P (x)
1863       && HARD_REGISTER_P (x))
1864     bitmap_set_range (regs_set, REGNO (x), REG_NREGS (x));
1865 }
1866 
1867 /* Helper function for replace_read and record_store.
1868    Attempt to return a value of mode READ_MODE stored in STORE_INFO,
1869    consisting of READ_WIDTH bytes starting from READ_OFFSET.  Return NULL
1870    if not successful.  If REQUIRE_CST is true, return always constant.  */
1871 
1872 static rtx
get_stored_val(store_info * store_info,machine_mode read_mode,poly_int64 read_offset,poly_int64 read_width,basic_block bb,bool require_cst)1873 get_stored_val (store_info *store_info, machine_mode read_mode,
1874 		poly_int64 read_offset, poly_int64 read_width,
1875 		basic_block bb, bool require_cst)
1876 {
1877   machine_mode store_mode = GET_MODE (store_info->mem);
1878   poly_int64 gap;
1879   rtx read_reg;
1880 
1881   /* To get here the read is within the boundaries of the write so
1882      shift will never be negative.  Start out with the shift being in
1883      bytes.  */
1884   if (store_mode == BLKmode)
1885     gap = 0;
1886   else if (BYTES_BIG_ENDIAN)
1887     gap = ((store_info->offset + store_info->width)
1888 	   - (read_offset + read_width));
1889   else
1890     gap = read_offset - store_info->offset;
1891 
1892   if (gap.is_constant () && maybe_ne (gap, 0))
1893     {
1894       poly_int64 shift = gap * BITS_PER_UNIT;
1895       poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
1896       read_reg = find_shift_sequence (access_size, store_info, read_mode,
1897 				      shift, optimize_bb_for_speed_p (bb),
1898 				      require_cst);
1899     }
1900   else if (store_mode == BLKmode)
1901     {
1902       /* The store is a memset (addr, const_val, const_size).  */
1903       gcc_assert (CONST_INT_P (store_info->rhs));
1904       scalar_int_mode int_store_mode;
1905       if (!int_mode_for_mode (read_mode).exists (&int_store_mode))
1906 	read_reg = NULL_RTX;
1907       else if (store_info->rhs == const0_rtx)
1908 	read_reg = extract_low_bits (read_mode, int_store_mode, const0_rtx);
1909       else if (GET_MODE_BITSIZE (int_store_mode) > HOST_BITS_PER_WIDE_INT
1910 	       || BITS_PER_UNIT >= HOST_BITS_PER_WIDE_INT)
1911 	read_reg = NULL_RTX;
1912       else
1913 	{
1914 	  unsigned HOST_WIDE_INT c
1915 	    = INTVAL (store_info->rhs)
1916 	      & ((HOST_WIDE_INT_1 << BITS_PER_UNIT) - 1);
1917 	  int shift = BITS_PER_UNIT;
1918 	  while (shift < HOST_BITS_PER_WIDE_INT)
1919 	    {
1920 	      c |= (c << shift);
1921 	      shift <<= 1;
1922 	    }
1923 	  read_reg = gen_int_mode (c, int_store_mode);
1924 	  read_reg = extract_low_bits (read_mode, int_store_mode, read_reg);
1925 	}
1926     }
1927   else if (store_info->const_rhs
1928 	   && (require_cst
1929 	       || GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode)))
1930     read_reg = extract_low_bits (read_mode, store_mode,
1931 				 copy_rtx (store_info->const_rhs));
1932   else
1933     read_reg = extract_low_bits (read_mode, store_mode,
1934 				 copy_rtx (store_info->rhs));
1935   if (require_cst && read_reg && !CONSTANT_P (read_reg))
1936     read_reg = NULL_RTX;
1937   return read_reg;
1938 }
1939 
1940 /* Take a sequence of:
1941      A <- r1
1942      ...
1943      ... <- A
1944 
1945    and change it into
1946    r2 <- r1
1947    A <- r1
1948    ...
1949    ... <- r2
1950 
1951    or
1952 
1953    r3 <- extract (r1)
1954    r3 <- r3 >> shift
1955    r2 <- extract (r3)
1956    ... <- r2
1957 
1958    or
1959 
1960    r2 <- extract (r1)
1961    ... <- r2
1962 
1963    Depending on the alignment and the mode of the store and
1964    subsequent load.
1965 
1966 
1967    The STORE_INFO and STORE_INSN are for the store and READ_INFO
1968    and READ_INSN are for the read.  Return true if the replacement
1969    went ok.  */
1970 
1971 static bool
replace_read(store_info * store_info,insn_info_t store_insn,read_info_t read_info,insn_info_t read_insn,rtx * loc)1972 replace_read (store_info *store_info, insn_info_t store_insn,
1973 	      read_info_t read_info, insn_info_t read_insn, rtx *loc)
1974 {
1975   machine_mode store_mode = GET_MODE (store_info->mem);
1976   machine_mode read_mode = GET_MODE (read_info->mem);
1977   rtx_insn *insns, *this_insn;
1978   rtx read_reg;
1979   basic_block bb;
1980 
1981   if (!dbg_cnt (dse))
1982     return false;
1983 
1984   /* Create a sequence of instructions to set up the read register.
1985      This sequence goes immediately before the store and its result
1986      is read by the load.
1987 
1988      We need to keep this in perspective.  We are replacing a read
1989      with a sequence of insns, but the read will almost certainly be
1990      in cache, so it is not going to be an expensive one.  Thus, we
1991      are not willing to do a multi insn shift or worse a subroutine
1992      call to get rid of the read.  */
1993   if (dump_file && (dump_flags & TDF_DETAILS))
1994     fprintf (dump_file, "trying to replace %smode load in insn %d"
1995 	     " from %smode store in insn %d\n",
1996 	     GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
1997 	     GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
1998   start_sequence ();
1999   bb = BLOCK_FOR_INSN (read_insn->insn);
2000   read_reg = get_stored_val (store_info,
2001 			     read_mode, read_info->offset, read_info->width,
2002 			     bb, false);
2003   if (read_reg == NULL_RTX)
2004     {
2005       end_sequence ();
2006       if (dump_file && (dump_flags & TDF_DETAILS))
2007 	fprintf (dump_file, " -- could not extract bits of stored value\n");
2008       return false;
2009     }
2010   /* Force the value into a new register so that it won't be clobbered
2011      between the store and the load.  */
2012   read_reg = copy_to_mode_reg (read_mode, read_reg);
2013   insns = get_insns ();
2014   end_sequence ();
2015 
2016   if (insns != NULL_RTX)
2017     {
2018       /* Now we have to scan the set of new instructions to see if the
2019 	 sequence contains and sets of hardregs that happened to be
2020 	 live at this point.  For instance, this can happen if one of
2021 	 the insns sets the CC and the CC happened to be live at that
2022 	 point.  This does occasionally happen, see PR 37922.  */
2023       bitmap regs_set = BITMAP_ALLOC (&reg_obstack);
2024 
2025       for (this_insn = insns;
2026 	   this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn))
2027 	{
2028 	  if (insn_invalid_p (this_insn, false))
2029 	    {
2030 	      if (dump_file && (dump_flags & TDF_DETAILS))
2031 		{
2032 		  fprintf (dump_file, " -- replacing the loaded MEM with ");
2033 		  print_simple_rtl (dump_file, read_reg);
2034 		  fprintf (dump_file, " led to an invalid instruction\n");
2035 		}
2036 	      BITMAP_FREE (regs_set);
2037 	      return false;
2038 	    }
2039 	  note_stores (this_insn, look_for_hardregs, regs_set);
2040 	}
2041 
2042       if (store_insn->fixed_regs_live)
2043 	bitmap_and_into (regs_set, store_insn->fixed_regs_live);
2044       if (!bitmap_empty_p (regs_set))
2045 	{
2046 	  if (dump_file && (dump_flags & TDF_DETAILS))
2047 	    {
2048 	      fprintf (dump_file, "abandoning replacement because sequence "
2049 				  "clobbers live hardregs:");
2050 	      df_print_regset (dump_file, regs_set);
2051 	    }
2052 
2053 	  BITMAP_FREE (regs_set);
2054 	  return false;
2055 	}
2056       BITMAP_FREE (regs_set);
2057     }
2058 
2059   subrtx_iterator::array_type array;
2060   FOR_EACH_SUBRTX (iter, array, *loc, NONCONST)
2061     {
2062       const_rtx x = *iter;
2063       if (GET_RTX_CLASS (GET_CODE (x)) == RTX_AUTOINC)
2064 	{
2065 	  if (dump_file && (dump_flags & TDF_DETAILS))
2066 	    fprintf (dump_file, " -- replacing the MEM failed due to address "
2067 				"side-effects\n");
2068 	  return false;
2069 	}
2070     }
2071 
2072   if (validate_change (read_insn->insn, loc, read_reg, 0))
2073     {
2074       deferred_change *change = deferred_change_pool.allocate ();
2075 
2076       /* Insert this right before the store insn where it will be safe
2077 	 from later insns that might change it before the read.  */
2078       emit_insn_before (insns, store_insn->insn);
2079 
2080       /* And now for the kludge part: cselib croaks if you just
2081 	 return at this point.  There are two reasons for this:
2082 
2083 	 1) Cselib has an idea of how many pseudos there are and
2084 	 that does not include the new ones we just added.
2085 
2086 	 2) Cselib does not know about the move insn we added
2087 	 above the store_info, and there is no way to tell it
2088 	 about it, because it has "moved on".
2089 
2090 	 Problem (1) is fixable with a certain amount of engineering.
2091 	 Problem (2) is requires starting the bb from scratch.  This
2092 	 could be expensive.
2093 
2094 	 So we are just going to have to lie.  The move/extraction
2095 	 insns are not really an issue, cselib did not see them.  But
2096 	 the use of the new pseudo read_insn is a real problem because
2097 	 cselib has not scanned this insn.  The way that we solve this
2098 	 problem is that we are just going to put the mem back for now
2099 	 and when we are finished with the block, we undo this.  We
2100 	 keep a table of mems to get rid of.  At the end of the basic
2101 	 block we can put them back.  */
2102 
2103       *loc = read_info->mem;
2104       change->next = deferred_change_list;
2105       deferred_change_list = change;
2106       change->loc = loc;
2107       change->reg = read_reg;
2108 
2109       /* Get rid of the read_info, from the point of view of the
2110 	 rest of dse, play like this read never happened.  */
2111       read_insn->read_rec = read_info->next;
2112       read_info_type_pool.remove (read_info);
2113       if (dump_file && (dump_flags & TDF_DETAILS))
2114 	{
2115 	  fprintf (dump_file, " -- replaced the loaded MEM with ");
2116 	  print_simple_rtl (dump_file, read_reg);
2117 	  fprintf (dump_file, "\n");
2118 	}
2119       return true;
2120     }
2121   else
2122     {
2123       if (dump_file && (dump_flags & TDF_DETAILS))
2124 	{
2125 	  fprintf (dump_file, " -- replacing the loaded MEM with ");
2126 	  print_simple_rtl (dump_file, read_reg);
2127 	  fprintf (dump_file, " led to an invalid instruction\n");
2128 	}
2129       return false;
2130     }
2131 }
2132 
2133 /* Check the address of MEM *LOC and kill any appropriate stores that may
2134    be active.  */
2135 
2136 static void
check_mem_read_rtx(rtx * loc,bb_info_t bb_info)2137 check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
2138 {
2139   rtx mem = *loc, mem_addr;
2140   insn_info_t insn_info;
2141   poly_int64 offset = 0;
2142   poly_int64 width = 0;
2143   cselib_val *base = NULL;
2144   int group_id;
2145   read_info_t read_info;
2146 
2147   insn_info = bb_info->last_insn;
2148 
2149   if ((MEM_ALIAS_SET (mem) == ALIAS_SET_MEMORY_BARRIER)
2150       || MEM_VOLATILE_P (mem))
2151     {
2152       if (crtl->stack_protect_guard
2153 	  && (MEM_EXPR (mem) == crtl->stack_protect_guard
2154 	      || (crtl->stack_protect_guard_decl
2155 		  && MEM_EXPR (mem) == crtl->stack_protect_guard_decl))
2156 	  && MEM_VOLATILE_P (mem))
2157 	{
2158 	  /* This is either the stack protector canary on the stack,
2159 	     which ought to be written by a MEM_VOLATILE_P store and
2160 	     thus shouldn't be deleted and is read at the very end of
2161 	     function, but shouldn't conflict with any other store.
2162 	     Or it is __stack_chk_guard variable or TLS or whatever else
2163 	     MEM holding the canary value, which really shouldn't be
2164 	     ever modified in -fstack-protector* protected functions,
2165 	     otherwise the prologue store wouldn't match the epilogue
2166 	     check.  */
2167 	  if (dump_file && (dump_flags & TDF_DETAILS))
2168 	    fprintf (dump_file, " stack protector canary read ignored.\n");
2169 	  insn_info->cannot_delete = true;
2170 	  return;
2171 	}
2172 
2173       if (dump_file && (dump_flags & TDF_DETAILS))
2174 	fprintf (dump_file, " adding wild read, volatile or barrier.\n");
2175       add_wild_read (bb_info);
2176       insn_info->cannot_delete = true;
2177       return;
2178     }
2179 
2180   /* If it is reading readonly mem, then there can be no conflict with
2181      another write. */
2182   if (MEM_READONLY_P (mem))
2183     return;
2184 
2185   if (!canon_address (mem, &group_id, &offset, &base))
2186     {
2187       if (dump_file && (dump_flags & TDF_DETAILS))
2188 	fprintf (dump_file, " adding wild read, canon_address failure.\n");
2189       add_wild_read (bb_info);
2190       return;
2191     }
2192 
2193   if (GET_MODE (mem) == BLKmode)
2194     width = -1;
2195   else
2196     width = GET_MODE_SIZE (GET_MODE (mem));
2197 
2198   if (!endpoint_representable_p (offset, known_eq (width, -1) ? 1 : width))
2199     {
2200       if (dump_file && (dump_flags & TDF_DETAILS))
2201 	fprintf (dump_file, " adding wild read, due to overflow.\n");
2202       add_wild_read (bb_info);
2203       return;
2204     }
2205 
2206   read_info = read_info_type_pool.allocate ();
2207   read_info->group_id = group_id;
2208   read_info->mem = mem;
2209   read_info->offset = offset;
2210   read_info->width = width;
2211   read_info->next = insn_info->read_rec;
2212   insn_info->read_rec = read_info;
2213   if (group_id < 0)
2214     mem_addr = base->val_rtx;
2215   else
2216     {
2217       group_info *group = rtx_group_vec[group_id];
2218       mem_addr = group->canon_base_addr;
2219     }
2220   if (maybe_ne (offset, 0))
2221     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
2222   /* Avoid passing VALUE RTXen as mem_addr to canon_true_dependence
2223      which will over and over re-create proper RTL and re-apply the
2224      offset above.  See PR80960 where we almost allocate 1.6GB of PLUS
2225      RTXen that way.  */
2226   mem_addr = get_addr (mem_addr);
2227 
2228   if (group_id >= 0)
2229     {
2230       /* This is the restricted case where the base is a constant or
2231 	 the frame pointer and offset is a constant.  */
2232       insn_info_t i_ptr = active_local_stores;
2233       insn_info_t last = NULL;
2234 
2235       if (dump_file && (dump_flags & TDF_DETAILS))
2236 	{
2237 	  if (!known_size_p (width))
2238 	    fprintf (dump_file, " processing const load gid=%d[BLK]\n",
2239 		     group_id);
2240 	  else
2241 	    {
2242 	      fprintf (dump_file, " processing const load gid=%d", group_id);
2243 	      print_range (dump_file, offset, width);
2244 	      fprintf (dump_file, "\n");
2245 	    }
2246 	}
2247 
2248       while (i_ptr)
2249 	{
2250 	  bool remove = false;
2251 	  store_info *store_info = i_ptr->store_rec;
2252 
2253 	  /* Skip the clobbers.  */
2254 	  while (!store_info->is_set)
2255 	    store_info = store_info->next;
2256 
2257 	  /* There are three cases here.  */
2258 	  if (store_info->group_id < 0)
2259 	    /* We have a cselib store followed by a read from a
2260 	       const base. */
2261 	    remove
2262 	      = canon_true_dependence (store_info->mem,
2263 				       GET_MODE (store_info->mem),
2264 				       store_info->mem_addr,
2265 				       mem, mem_addr);
2266 
2267 	  else if (group_id == store_info->group_id)
2268 	    {
2269 	      /* This is a block mode load.  We may get lucky and
2270 		 canon_true_dependence may save the day.  */
2271 	      if (!known_size_p (width))
2272 		remove
2273 		  = canon_true_dependence (store_info->mem,
2274 					   GET_MODE (store_info->mem),
2275 					   store_info->mem_addr,
2276 					   mem, mem_addr);
2277 
2278 	      /* If this read is just reading back something that we just
2279 		 stored, rewrite the read.  */
2280 	      else
2281 		{
2282 		  if (store_info->rhs
2283 		      && known_subrange_p (offset, width, store_info->offset,
2284 					   store_info->width)
2285 		      && all_positions_needed_p (store_info,
2286 						 offset - store_info->offset,
2287 						 width)
2288 		      && replace_read (store_info, i_ptr, read_info,
2289 				       insn_info, loc))
2290 		    return;
2291 
2292 		  /* The bases are the same, just see if the offsets
2293 		     could overlap.  */
2294 		  if (ranges_maybe_overlap_p (offset, width,
2295 					      store_info->offset,
2296 					      store_info->width))
2297 		    remove = true;
2298 		}
2299 	    }
2300 
2301 	  /* else
2302 	     The else case that is missing here is that the
2303 	     bases are constant but different.  There is nothing
2304 	     to do here because there is no overlap.  */
2305 
2306 	  if (remove)
2307 	    {
2308 	      if (dump_file && (dump_flags & TDF_DETAILS))
2309 		dump_insn_info ("removing from active", i_ptr);
2310 
2311 	      active_local_stores_len--;
2312 	      if (last)
2313 		last->next_local_store = i_ptr->next_local_store;
2314 	      else
2315 		active_local_stores = i_ptr->next_local_store;
2316 	    }
2317 	  else
2318 	    last = i_ptr;
2319 	  i_ptr = i_ptr->next_local_store;
2320 	}
2321     }
2322   else
2323     {
2324       insn_info_t i_ptr = active_local_stores;
2325       insn_info_t last = NULL;
2326       if (dump_file && (dump_flags & TDF_DETAILS))
2327 	{
2328 	  fprintf (dump_file, " processing cselib load mem:");
2329 	  print_inline_rtx (dump_file, mem, 0);
2330 	  fprintf (dump_file, "\n");
2331 	}
2332 
2333       while (i_ptr)
2334 	{
2335 	  bool remove = false;
2336 	  store_info *store_info = i_ptr->store_rec;
2337 
2338 	  if (dump_file && (dump_flags & TDF_DETAILS))
2339 	    fprintf (dump_file, " processing cselib load against insn %d\n",
2340 		     INSN_UID (i_ptr->insn));
2341 
2342 	  /* Skip the clobbers.  */
2343 	  while (!store_info->is_set)
2344 	    store_info = store_info->next;
2345 
2346 	  /* If this read is just reading back something that we just
2347 	     stored, rewrite the read.  */
2348 	  if (store_info->rhs
2349 	      && store_info->group_id == -1
2350 	      && store_info->cse_base == base
2351 	      && known_subrange_p (offset, width, store_info->offset,
2352 				   store_info->width)
2353 	      && all_positions_needed_p (store_info,
2354 					 offset - store_info->offset, width)
2355 	      && replace_read (store_info, i_ptr,  read_info, insn_info, loc))
2356 	    return;
2357 
2358 	  remove = canon_true_dependence (store_info->mem,
2359 					  GET_MODE (store_info->mem),
2360 					  store_info->mem_addr,
2361 					  mem, mem_addr);
2362 
2363 	  if (remove)
2364 	    {
2365 	      if (dump_file && (dump_flags & TDF_DETAILS))
2366 		dump_insn_info ("removing from active", i_ptr);
2367 
2368 	      active_local_stores_len--;
2369 	      if (last)
2370 		last->next_local_store = i_ptr->next_local_store;
2371 	      else
2372 		active_local_stores = i_ptr->next_local_store;
2373 	    }
2374 	  else
2375 	    last = i_ptr;
2376 	  i_ptr = i_ptr->next_local_store;
2377 	}
2378     }
2379 }
2380 
2381 /* A note_uses callback in which DATA points the INSN_INFO for
2382    as check_mem_read_rtx.  Nullify the pointer if i_m_r_m_r returns
2383    true for any part of *LOC.  */
2384 
2385 static void
check_mem_read_use(rtx * loc,void * data)2386 check_mem_read_use (rtx *loc, void *data)
2387 {
2388   subrtx_ptr_iterator::array_type array;
2389   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
2390     {
2391       rtx *loc = *iter;
2392       if (MEM_P (*loc))
2393 	check_mem_read_rtx (loc, (bb_info_t) data);
2394     }
2395 }
2396 
2397 
2398 /* Get arguments passed to CALL_INSN.  Return TRUE if successful.
2399    So far it only handles arguments passed in registers.  */
2400 
2401 static bool
get_call_args(rtx call_insn,tree fn,rtx * args,int nargs)2402 get_call_args (rtx call_insn, tree fn, rtx *args, int nargs)
2403 {
2404   CUMULATIVE_ARGS args_so_far_v;
2405   cumulative_args_t args_so_far;
2406   tree arg;
2407   int idx;
2408 
2409   INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
2410   args_so_far = pack_cumulative_args (&args_so_far_v);
2411 
2412   arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
2413   for (idx = 0;
2414        arg != void_list_node && idx < nargs;
2415        arg = TREE_CHAIN (arg), idx++)
2416     {
2417       scalar_int_mode mode;
2418       rtx reg, link, tmp;
2419 
2420       if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode))
2421 	return false;
2422 
2423       function_arg_info arg (mode, /*named=*/true);
2424       reg = targetm.calls.function_arg (args_so_far, arg);
2425       if (!reg || !REG_P (reg) || GET_MODE (reg) != mode)
2426 	return false;
2427 
2428       for (link = CALL_INSN_FUNCTION_USAGE (call_insn);
2429 	   link;
2430 	   link = XEXP (link, 1))
2431 	if (GET_CODE (XEXP (link, 0)) == USE)
2432 	  {
2433 	    scalar_int_mode arg_mode;
2434 	    args[idx] = XEXP (XEXP (link, 0), 0);
2435 	    if (REG_P (args[idx])
2436 		&& REGNO (args[idx]) == REGNO (reg)
2437 		&& (GET_MODE (args[idx]) == mode
2438 		    || (is_int_mode (GET_MODE (args[idx]), &arg_mode)
2439 			&& (GET_MODE_SIZE (arg_mode) <= UNITS_PER_WORD)
2440 			&& (GET_MODE_SIZE (arg_mode) > GET_MODE_SIZE (mode)))))
2441 	      break;
2442 	  }
2443       if (!link)
2444 	return false;
2445 
2446       tmp = cselib_expand_value_rtx (args[idx], scratch, 5);
2447       if (GET_MODE (args[idx]) != mode)
2448 	{
2449 	  if (!tmp || !CONST_INT_P (tmp))
2450 	    return false;
2451 	  tmp = gen_int_mode (INTVAL (tmp), mode);
2452 	}
2453       if (tmp)
2454 	args[idx] = tmp;
2455 
2456       targetm.calls.function_arg_advance (args_so_far, arg);
2457     }
2458   if (arg != void_list_node || idx != nargs)
2459     return false;
2460   return true;
2461 }
2462 
2463 /* Return a bitmap of the fixed registers contained in IN.  */
2464 
2465 static bitmap
copy_fixed_regs(const_bitmap in)2466 copy_fixed_regs (const_bitmap in)
2467 {
2468   bitmap ret;
2469 
2470   ret = ALLOC_REG_SET (NULL);
2471   bitmap_and (ret, in, bitmap_view<HARD_REG_SET> (fixed_reg_set));
2472   return ret;
2473 }
2474 
2475 /* Apply record_store to all candidate stores in INSN.  Mark INSN
2476    if some part of it is not a candidate store and assigns to a
2477    non-register target.  */
2478 
2479 static void
scan_insn(bb_info_t bb_info,rtx_insn * insn,int max_active_local_stores)2480 scan_insn (bb_info_t bb_info, rtx_insn *insn, int max_active_local_stores)
2481 {
2482   rtx body;
2483   insn_info_type *insn_info = insn_info_type_pool.allocate ();
2484   int mems_found = 0;
2485   memset (insn_info, 0, sizeof (struct insn_info_type));
2486 
2487   if (dump_file && (dump_flags & TDF_DETAILS))
2488     fprintf (dump_file, "\n**scanning insn=%d\n",
2489 	     INSN_UID (insn));
2490 
2491   insn_info->prev_insn = bb_info->last_insn;
2492   insn_info->insn = insn;
2493   bb_info->last_insn = insn_info;
2494 
2495   if (DEBUG_INSN_P (insn))
2496     {
2497       insn_info->cannot_delete = true;
2498       return;
2499     }
2500 
2501   /* Look at all of the uses in the insn.  */
2502   note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
2503 
2504   if (CALL_P (insn))
2505     {
2506       bool const_call;
2507       rtx call, sym;
2508       tree memset_call = NULL_TREE;
2509 
2510       insn_info->cannot_delete = true;
2511 
2512       /* Const functions cannot do anything bad i.e. read memory,
2513 	 however, they can read their parameters which may have
2514 	 been pushed onto the stack.
2515 	 memset and bzero don't read memory either.  */
2516       const_call = RTL_CONST_CALL_P (insn);
2517       if (!const_call
2518 	  && (call = get_call_rtx_from (insn))
2519 	  && (sym = XEXP (XEXP (call, 0), 0))
2520 	  && GET_CODE (sym) == SYMBOL_REF
2521 	  && SYMBOL_REF_DECL (sym)
2522 	  && TREE_CODE (SYMBOL_REF_DECL (sym)) == FUNCTION_DECL
2523 	  && fndecl_built_in_p (SYMBOL_REF_DECL (sym), BUILT_IN_MEMSET))
2524 	memset_call = SYMBOL_REF_DECL (sym);
2525 
2526       if (const_call || memset_call)
2527 	{
2528 	  insn_info_t i_ptr = active_local_stores;
2529 	  insn_info_t last = NULL;
2530 
2531 	  if (dump_file && (dump_flags & TDF_DETAILS))
2532 	    fprintf (dump_file, "%s call %d\n",
2533 		     const_call ? "const" : "memset", INSN_UID (insn));
2534 
2535 	  /* See the head comment of the frame_read field.  */
2536 	  if (reload_completed
2537 	      /* Tail calls are storing their arguments using
2538 		 arg pointer.  If it is a frame pointer on the target,
2539 		 even before reload we need to kill frame pointer based
2540 		 stores.  */
2541 	      || (SIBLING_CALL_P (insn)
2542 		  && HARD_FRAME_POINTER_IS_ARG_POINTER))
2543 	    insn_info->frame_read = true;
2544 
2545 	  /* Loop over the active stores and remove those which are
2546 	     killed by the const function call.  */
2547 	  while (i_ptr)
2548 	    {
2549 	      bool remove_store = false;
2550 
2551 	      /* The stack pointer based stores are always killed.  */
2552 	      if (i_ptr->stack_pointer_based)
2553 	        remove_store = true;
2554 
2555 	      /* If the frame is read, the frame related stores are killed.  */
2556 	      else if (insn_info->frame_read)
2557 		{
2558 		  store_info *store_info = i_ptr->store_rec;
2559 
2560 		  /* Skip the clobbers.  */
2561 		  while (!store_info->is_set)
2562 		    store_info = store_info->next;
2563 
2564 		  if (store_info->group_id >= 0
2565 		      && rtx_group_vec[store_info->group_id]->frame_related)
2566 		    remove_store = true;
2567 		}
2568 
2569 	      if (remove_store)
2570 		{
2571 		  if (dump_file && (dump_flags & TDF_DETAILS))
2572 		    dump_insn_info ("removing from active", i_ptr);
2573 
2574 		  active_local_stores_len--;
2575 		  if (last)
2576 		    last->next_local_store = i_ptr->next_local_store;
2577 		  else
2578 		    active_local_stores = i_ptr->next_local_store;
2579 		}
2580 	      else
2581 		last = i_ptr;
2582 
2583 	      i_ptr = i_ptr->next_local_store;
2584 	    }
2585 
2586 	  if (memset_call)
2587 	    {
2588 	      rtx args[3];
2589 	      if (get_call_args (insn, memset_call, args, 3)
2590 		  && CONST_INT_P (args[1])
2591 		  && CONST_INT_P (args[2])
2592 		  && INTVAL (args[2]) > 0)
2593 		{
2594 		  rtx mem = gen_rtx_MEM (BLKmode, args[0]);
2595 		  set_mem_size (mem, INTVAL (args[2]));
2596 		  body = gen_rtx_SET (mem, args[1]);
2597 		  mems_found += record_store (body, bb_info);
2598 		  if (dump_file && (dump_flags & TDF_DETAILS))
2599 		    fprintf (dump_file, "handling memset as BLKmode store\n");
2600 		  if (mems_found == 1)
2601 		    {
2602 		      if (active_local_stores_len++ >= max_active_local_stores)
2603 			{
2604 			  active_local_stores_len = 1;
2605 			  active_local_stores = NULL;
2606 			}
2607 		      insn_info->fixed_regs_live
2608 			= copy_fixed_regs (bb_info->regs_live);
2609 		      insn_info->next_local_store = active_local_stores;
2610 		      active_local_stores = insn_info;
2611 		    }
2612 		}
2613 	      else
2614 		clear_rhs_from_active_local_stores ();
2615 	    }
2616 	}
2617       else if (SIBLING_CALL_P (insn)
2618 	       && (reload_completed || HARD_FRAME_POINTER_IS_ARG_POINTER))
2619 	/* Arguments for a sibling call that are pushed to memory are passed
2620 	   using the incoming argument pointer of the current function.  After
2621 	   reload that might be (and likely is) frame pointer based.  And, if
2622 	   it is a frame pointer on the target, even before reload we need to
2623 	   kill frame pointer based stores.  */
2624 	add_wild_read (bb_info);
2625       else
2626 	/* Every other call, including pure functions, may read any memory
2627            that is not relative to the frame.  */
2628         add_non_frame_wild_read (bb_info);
2629 
2630       return;
2631     }
2632 
2633   /* Assuming that there are sets in these insns, we cannot delete
2634      them.  */
2635   if ((GET_CODE (PATTERN (insn)) == CLOBBER)
2636       || volatile_refs_p (PATTERN (insn))
2637       || (!cfun->can_delete_dead_exceptions && !insn_nothrow_p (insn))
2638       || (RTX_FRAME_RELATED_P (insn))
2639       || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
2640     insn_info->cannot_delete = true;
2641 
2642   body = PATTERN (insn);
2643   if (GET_CODE (body) == PARALLEL)
2644     {
2645       int i;
2646       for (i = 0; i < XVECLEN (body, 0); i++)
2647 	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
2648     }
2649   else
2650     mems_found += record_store (body, bb_info);
2651 
2652   if (dump_file && (dump_flags & TDF_DETAILS))
2653     fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
2654 	     mems_found, insn_info->cannot_delete ? "true" : "false");
2655 
2656   /* If we found some sets of mems, add it into the active_local_stores so
2657      that it can be locally deleted if found dead or used for
2658      replace_read and redundant constant store elimination.  Otherwise mark
2659      it as cannot delete.  This simplifies the processing later.  */
2660   if (mems_found == 1)
2661     {
2662       if (active_local_stores_len++ >= max_active_local_stores)
2663 	{
2664 	  active_local_stores_len = 1;
2665 	  active_local_stores = NULL;
2666 	}
2667       insn_info->fixed_regs_live = copy_fixed_regs (bb_info->regs_live);
2668       insn_info->next_local_store = active_local_stores;
2669       active_local_stores = insn_info;
2670     }
2671   else
2672     insn_info->cannot_delete = true;
2673 }
2674 
2675 
2676 /* Remove BASE from the set of active_local_stores.  This is a
2677    callback from cselib that is used to get rid of the stores in
2678    active_local_stores.  */
2679 
2680 static void
remove_useless_values(cselib_val * base)2681 remove_useless_values (cselib_val *base)
2682 {
2683   insn_info_t insn_info = active_local_stores;
2684   insn_info_t last = NULL;
2685 
2686   while (insn_info)
2687     {
2688       store_info *store_info = insn_info->store_rec;
2689       bool del = false;
2690 
2691       /* If ANY of the store_infos match the cselib group that is
2692 	 being deleted, then the insn cannot be deleted.  */
2693       while (store_info)
2694 	{
2695 	  if ((store_info->group_id == -1)
2696 	      && (store_info->cse_base == base))
2697 	    {
2698 	      del = true;
2699 	      break;
2700 	    }
2701 	  store_info = store_info->next;
2702 	}
2703 
2704       if (del)
2705 	{
2706 	  active_local_stores_len--;
2707 	  if (last)
2708 	    last->next_local_store = insn_info->next_local_store;
2709 	  else
2710 	    active_local_stores = insn_info->next_local_store;
2711 	  free_store_info (insn_info);
2712 	}
2713       else
2714 	last = insn_info;
2715 
2716       insn_info = insn_info->next_local_store;
2717     }
2718 }
2719 
2720 
2721 /* Do all of step 1.  */
2722 
2723 static void
dse_step1(void)2724 dse_step1 (void)
2725 {
2726   basic_block bb;
2727   bitmap regs_live = BITMAP_ALLOC (&reg_obstack);
2728 
2729   cselib_init (0);
2730   all_blocks = BITMAP_ALLOC (NULL);
2731   bitmap_set_bit (all_blocks, ENTRY_BLOCK);
2732   bitmap_set_bit (all_blocks, EXIT_BLOCK);
2733 
2734   /* For -O1 reduce the maximum number of active local stores for RTL DSE
2735      since this can consume huge amounts of memory (PR89115).  */
2736   int max_active_local_stores = param_max_dse_active_local_stores;
2737   if (optimize < 2)
2738     max_active_local_stores /= 10;
2739 
2740   FOR_ALL_BB_FN (bb, cfun)
2741     {
2742       insn_info_t ptr;
2743       bb_info_t bb_info = dse_bb_info_type_pool.allocate ();
2744 
2745       memset (bb_info, 0, sizeof (dse_bb_info_type));
2746       bitmap_set_bit (all_blocks, bb->index);
2747       bb_info->regs_live = regs_live;
2748 
2749       bitmap_copy (regs_live, DF_LR_IN (bb));
2750       df_simulate_initialize_forwards (bb, regs_live);
2751 
2752       bb_table[bb->index] = bb_info;
2753       cselib_discard_hook = remove_useless_values;
2754 
2755       if (bb->index >= NUM_FIXED_BLOCKS)
2756 	{
2757 	  rtx_insn *insn;
2758 
2759 	  active_local_stores = NULL;
2760 	  active_local_stores_len = 0;
2761 	  cselib_clear_table ();
2762 
2763 	  /* Scan the insns.  */
2764 	  FOR_BB_INSNS (bb, insn)
2765 	    {
2766 	      if (INSN_P (insn))
2767 		scan_insn (bb_info, insn, max_active_local_stores);
2768 	      cselib_process_insn (insn);
2769 	      if (INSN_P (insn))
2770 		df_simulate_one_insn_forwards (bb, insn, regs_live);
2771 	    }
2772 
2773 	  /* This is something of a hack, because the global algorithm
2774 	     is supposed to take care of the case where stores go dead
2775 	     at the end of the function.  However, the global
2776 	     algorithm must take a more conservative view of block
2777 	     mode reads than the local alg does.  So to get the case
2778 	     where you have a store to the frame followed by a non
2779 	     overlapping block more read, we look at the active local
2780 	     stores at the end of the function and delete all of the
2781 	     frame and spill based ones.  */
2782 	  if (stores_off_frame_dead_at_return
2783 	      && (EDGE_COUNT (bb->succs) == 0
2784 		  || (single_succ_p (bb)
2785 		      && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)
2786 		      && ! crtl->calls_eh_return)))
2787 	    {
2788 	      insn_info_t i_ptr = active_local_stores;
2789 	      while (i_ptr)
2790 		{
2791 		  store_info *store_info = i_ptr->store_rec;
2792 
2793 		  /* Skip the clobbers.  */
2794 		  while (!store_info->is_set)
2795 		    store_info = store_info->next;
2796 		  if (store_info->group_id >= 0)
2797 		    {
2798 		      group_info *group = rtx_group_vec[store_info->group_id];
2799 		      if (group->frame_related && !i_ptr->cannot_delete)
2800 			delete_dead_store_insn (i_ptr);
2801 		    }
2802 
2803 		  i_ptr = i_ptr->next_local_store;
2804 		}
2805 	    }
2806 
2807 	  /* Get rid of the loads that were discovered in
2808 	     replace_read.  Cselib is finished with this block.  */
2809 	  while (deferred_change_list)
2810 	    {
2811 	      deferred_change *next = deferred_change_list->next;
2812 
2813 	      /* There is no reason to validate this change.  That was
2814 		 done earlier.  */
2815 	      *deferred_change_list->loc = deferred_change_list->reg;
2816 	      deferred_change_pool.remove (deferred_change_list);
2817 	      deferred_change_list = next;
2818 	    }
2819 
2820 	  /* Get rid of all of the cselib based store_infos in this
2821 	     block and mark the containing insns as not being
2822 	     deletable.  */
2823 	  ptr = bb_info->last_insn;
2824 	  while (ptr)
2825 	    {
2826 	      if (ptr->contains_cselib_groups)
2827 		{
2828 		  store_info *s_info = ptr->store_rec;
2829 		  while (s_info && !s_info->is_set)
2830 		    s_info = s_info->next;
2831 		  if (s_info
2832 		      && s_info->redundant_reason
2833 		      && s_info->redundant_reason->insn
2834 		      && !ptr->cannot_delete)
2835 		    {
2836 		      if (dump_file && (dump_flags & TDF_DETAILS))
2837 			fprintf (dump_file, "Locally deleting insn %d "
2838 					    "because insn %d stores the "
2839 					    "same value and couldn't be "
2840 					    "eliminated\n",
2841 				 INSN_UID (ptr->insn),
2842 				 INSN_UID (s_info->redundant_reason->insn));
2843 		      delete_dead_store_insn (ptr);
2844 		    }
2845 		  free_store_info (ptr);
2846 		}
2847 	      else
2848 		{
2849 		  store_info *s_info;
2850 
2851 		  /* Free at least positions_needed bitmaps.  */
2852 		  for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
2853 		    if (s_info->is_large)
2854 		      {
2855 			BITMAP_FREE (s_info->positions_needed.large.bmap);
2856 			s_info->is_large = false;
2857 		      }
2858 		}
2859 	      ptr = ptr->prev_insn;
2860 	    }
2861 
2862 	  cse_store_info_pool.release ();
2863 	}
2864       bb_info->regs_live = NULL;
2865     }
2866 
2867   BITMAP_FREE (regs_live);
2868   cselib_finish ();
2869   rtx_group_table->empty ();
2870 }
2871 
2872 
2873 /*----------------------------------------------------------------------------
2874    Second step.
2875 
2876    Assign each byte position in the stores that we are going to
2877    analyze globally to a position in the bitmaps.  Returns true if
2878    there are any bit positions assigned.
2879 ----------------------------------------------------------------------------*/
2880 
2881 static void
dse_step2_init(void)2882 dse_step2_init (void)
2883 {
2884   unsigned int i;
2885   group_info *group;
2886 
2887   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2888     {
2889       /* For all non stack related bases, we only consider a store to
2890 	 be deletable if there are two or more stores for that
2891 	 position.  This is because it takes one store to make the
2892 	 other store redundant.  However, for the stores that are
2893 	 stack related, we consider them if there is only one store
2894 	 for the position.  We do this because the stack related
2895 	 stores can be deleted if their is no read between them and
2896 	 the end of the function.
2897 
2898 	 To make this work in the current framework, we take the stack
2899 	 related bases add all of the bits from store1 into store2.
2900 	 This has the effect of making the eligible even if there is
2901 	 only one store.   */
2902 
2903       if (stores_off_frame_dead_at_return && group->frame_related)
2904 	{
2905 	  bitmap_ior_into (group->store2_n, group->store1_n);
2906 	  bitmap_ior_into (group->store2_p, group->store1_p);
2907 	  if (dump_file && (dump_flags & TDF_DETAILS))
2908 	    fprintf (dump_file, "group %d is frame related ", i);
2909 	}
2910 
2911       group->offset_map_size_n++;
2912       group->offset_map_n = XOBNEWVEC (&dse_obstack, int,
2913 				       group->offset_map_size_n);
2914       group->offset_map_size_p++;
2915       group->offset_map_p = XOBNEWVEC (&dse_obstack, int,
2916 				       group->offset_map_size_p);
2917       group->process_globally = false;
2918       if (dump_file && (dump_flags & TDF_DETAILS))
2919 	{
2920 	  fprintf (dump_file, "group %d(%d+%d): ", i,
2921 		   (int)bitmap_count_bits (group->store2_n),
2922 		   (int)bitmap_count_bits (group->store2_p));
2923 	  bitmap_print (dump_file, group->store2_n, "n ", " ");
2924 	  bitmap_print (dump_file, group->store2_p, "p ", "\n");
2925 	}
2926     }
2927 }
2928 
2929 
2930 /* Init the offset tables.  */
2931 
2932 static bool
dse_step2(void)2933 dse_step2 (void)
2934 {
2935   unsigned int i;
2936   group_info *group;
2937   /* Position 0 is unused because 0 is used in the maps to mean
2938      unused.  */
2939   current_position = 1;
2940   FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
2941     {
2942       bitmap_iterator bi;
2943       unsigned int j;
2944 
2945       memset (group->offset_map_n, 0, sizeof (int) * group->offset_map_size_n);
2946       memset (group->offset_map_p, 0, sizeof (int) * group->offset_map_size_p);
2947       bitmap_clear (group->group_kill);
2948 
2949       EXECUTE_IF_SET_IN_BITMAP (group->store2_n, 0, j, bi)
2950 	{
2951 	  bitmap_set_bit (group->group_kill, current_position);
2952           if (bitmap_bit_p (group->escaped_n, j))
2953 	    bitmap_set_bit (kill_on_calls, current_position);
2954 	  group->offset_map_n[j] = current_position++;
2955 	  group->process_globally = true;
2956 	}
2957       EXECUTE_IF_SET_IN_BITMAP (group->store2_p, 0, j, bi)
2958 	{
2959 	  bitmap_set_bit (group->group_kill, current_position);
2960           if (bitmap_bit_p (group->escaped_p, j))
2961 	    bitmap_set_bit (kill_on_calls, current_position);
2962 	  group->offset_map_p[j] = current_position++;
2963 	  group->process_globally = true;
2964 	}
2965     }
2966   return current_position != 1;
2967 }
2968 
2969 
2970 
2971 /*----------------------------------------------------------------------------
2972   Third step.
2973 
2974   Build the bit vectors for the transfer functions.
2975 ----------------------------------------------------------------------------*/
2976 
2977 
2978 /* Look up the bitmap index for OFFSET in GROUP_INFO.  If it is not
2979    there, return 0.  */
2980 
2981 static int
get_bitmap_index(group_info * group_info,HOST_WIDE_INT offset)2982 get_bitmap_index (group_info *group_info, HOST_WIDE_INT offset)
2983 {
2984   if (offset < 0)
2985     {
2986       HOST_WIDE_INT offset_p = -offset;
2987       if (offset_p >= group_info->offset_map_size_n)
2988 	return 0;
2989       return group_info->offset_map_n[offset_p];
2990     }
2991   else
2992     {
2993       if (offset >= group_info->offset_map_size_p)
2994 	return 0;
2995       return group_info->offset_map_p[offset];
2996     }
2997 }
2998 
2999 
3000 /* Process the STORE_INFOs into the bitmaps into GEN and KILL.  KILL
3001    may be NULL. */
3002 
3003 static void
scan_stores(store_info * store_info,bitmap gen,bitmap kill)3004 scan_stores (store_info *store_info, bitmap gen, bitmap kill)
3005 {
3006   while (store_info)
3007     {
3008       HOST_WIDE_INT i, offset, width;
3009       group_info *group_info
3010 	= rtx_group_vec[store_info->group_id];
3011       /* We can (conservatively) ignore stores whose bounds aren't known;
3012 	 they simply don't generate new global dse opportunities.  */
3013       if (group_info->process_globally
3014 	  && store_info->offset.is_constant (&offset)
3015 	  && store_info->width.is_constant (&width))
3016 	{
3017 	  HOST_WIDE_INT end = offset + width;
3018 	  for (i = offset; i < end; i++)
3019 	    {
3020 	      int index = get_bitmap_index (group_info, i);
3021 	      if (index != 0)
3022 		{
3023 		  bitmap_set_bit (gen, index);
3024 		  if (kill)
3025 		    bitmap_clear_bit (kill, index);
3026 		}
3027 	    }
3028 	}
3029       store_info = store_info->next;
3030     }
3031 }
3032 
3033 
3034 /* Process the READ_INFOs into the bitmaps into GEN and KILL.  KILL
3035    may be NULL.  */
3036 
3037 static void
scan_reads(insn_info_t insn_info,bitmap gen,bitmap kill)3038 scan_reads (insn_info_t insn_info, bitmap gen, bitmap kill)
3039 {
3040   read_info_t read_info = insn_info->read_rec;
3041   int i;
3042   group_info *group;
3043 
3044   /* If this insn reads the frame, kill all the frame related stores.  */
3045   if (insn_info->frame_read)
3046     {
3047       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3048 	if (group->process_globally && group->frame_related)
3049 	  {
3050 	    if (kill)
3051 	      bitmap_ior_into (kill, group->group_kill);
3052 	    bitmap_and_compl_into (gen, group->group_kill);
3053 	  }
3054     }
3055   if (insn_info->non_frame_wild_read)
3056     {
3057       /* Kill all non-frame related stores.  Kill all stores of variables that
3058          escape.  */
3059       if (kill)
3060         bitmap_ior_into (kill, kill_on_calls);
3061       bitmap_and_compl_into (gen, kill_on_calls);
3062       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3063 	if (group->process_globally && !group->frame_related)
3064 	  {
3065 	    if (kill)
3066 	      bitmap_ior_into (kill, group->group_kill);
3067 	    bitmap_and_compl_into (gen, group->group_kill);
3068 	  }
3069     }
3070   while (read_info)
3071     {
3072       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3073 	{
3074 	  if (group->process_globally)
3075 	    {
3076 	      if (i == read_info->group_id)
3077 		{
3078 		  HOST_WIDE_INT offset, width;
3079 		  /* Reads with non-constant size kill all DSE opportunities
3080 		     in the group.  */
3081 		  if (!read_info->offset.is_constant (&offset)
3082 		      || !read_info->width.is_constant (&width)
3083 		      || !known_size_p (width))
3084 		    {
3085 		      /* Handle block mode reads.  */
3086 		      if (kill)
3087 			bitmap_ior_into (kill, group->group_kill);
3088 		      bitmap_and_compl_into (gen, group->group_kill);
3089 		    }
3090 		  else
3091 		    {
3092 		      /* The groups are the same, just process the
3093 			 offsets.  */
3094 		      HOST_WIDE_INT j;
3095 		      HOST_WIDE_INT end = offset + width;
3096 		      for (j = offset; j < end; j++)
3097 			{
3098 			  int index = get_bitmap_index (group, j);
3099 			  if (index != 0)
3100 			    {
3101 			      if (kill)
3102 				bitmap_set_bit (kill, index);
3103 			      bitmap_clear_bit (gen, index);
3104 			    }
3105 			}
3106 		    }
3107 		}
3108 	      else
3109 		{
3110 		  /* The groups are different, if the alias sets
3111 		     conflict, clear the entire group.  We only need
3112 		     to apply this test if the read_info is a cselib
3113 		     read.  Anything with a constant base cannot alias
3114 		     something else with a different constant
3115 		     base.  */
3116 		  if ((read_info->group_id < 0)
3117 		      && canon_true_dependence (group->base_mem,
3118 						GET_MODE (group->base_mem),
3119 						group->canon_base_addr,
3120 						read_info->mem, NULL_RTX))
3121 		    {
3122 		      if (kill)
3123 			bitmap_ior_into (kill, group->group_kill);
3124 		      bitmap_and_compl_into (gen, group->group_kill);
3125 		    }
3126 		}
3127 	    }
3128 	}
3129 
3130       read_info = read_info->next;
3131     }
3132 }
3133 
3134 
3135 /* Return the insn in BB_INFO before the first wild read or if there
3136    are no wild reads in the block, return the last insn.  */
3137 
3138 static insn_info_t
find_insn_before_first_wild_read(bb_info_t bb_info)3139 find_insn_before_first_wild_read (bb_info_t bb_info)
3140 {
3141   insn_info_t insn_info = bb_info->last_insn;
3142   insn_info_t last_wild_read = NULL;
3143 
3144   while (insn_info)
3145     {
3146       if (insn_info->wild_read)
3147 	{
3148 	  last_wild_read = insn_info->prev_insn;
3149 	  /* Block starts with wild read.  */
3150 	  if (!last_wild_read)
3151 	    return NULL;
3152 	}
3153 
3154       insn_info = insn_info->prev_insn;
3155     }
3156 
3157   if (last_wild_read)
3158     return last_wild_read;
3159   else
3160     return bb_info->last_insn;
3161 }
3162 
3163 
3164 /* Scan the insns in BB_INFO starting at PTR and going to the top of
3165    the block in order to build the gen and kill sets for the block.
3166    We start at ptr which may be the last insn in the block or may be
3167    the first insn with a wild read.  In the latter case we are able to
3168    skip the rest of the block because it just does not matter:
3169    anything that happens is hidden by the wild read.  */
3170 
3171 static void
dse_step3_scan(basic_block bb)3172 dse_step3_scan (basic_block bb)
3173 {
3174   bb_info_t bb_info = bb_table[bb->index];
3175   insn_info_t insn_info;
3176 
3177   insn_info = find_insn_before_first_wild_read (bb_info);
3178 
3179   /* In the spill case or in the no_spill case if there is no wild
3180      read in the block, we will need a kill set.  */
3181   if (insn_info == bb_info->last_insn)
3182     {
3183       if (bb_info->kill)
3184 	bitmap_clear (bb_info->kill);
3185       else
3186 	bb_info->kill = BITMAP_ALLOC (&dse_bitmap_obstack);
3187     }
3188   else
3189     if (bb_info->kill)
3190       BITMAP_FREE (bb_info->kill);
3191 
3192   while (insn_info)
3193     {
3194       /* There may have been code deleted by the dce pass run before
3195 	 this phase.  */
3196       if (insn_info->insn && INSN_P (insn_info->insn))
3197 	{
3198 	  scan_stores (insn_info->store_rec, bb_info->gen, bb_info->kill);
3199 	  scan_reads (insn_info, bb_info->gen, bb_info->kill);
3200 	}
3201 
3202       insn_info = insn_info->prev_insn;
3203     }
3204 }
3205 
3206 
3207 /* Set the gen set of the exit block, and also any block with no
3208    successors that does not have a wild read.  */
3209 
3210 static void
dse_step3_exit_block_scan(bb_info_t bb_info)3211 dse_step3_exit_block_scan (bb_info_t bb_info)
3212 {
3213   /* The gen set is all 0's for the exit block except for the
3214      frame_pointer_group.  */
3215 
3216   if (stores_off_frame_dead_at_return)
3217     {
3218       unsigned int i;
3219       group_info *group;
3220 
3221       FOR_EACH_VEC_ELT (rtx_group_vec, i, group)
3222 	{
3223 	  if (group->process_globally && group->frame_related)
3224 	    bitmap_ior_into (bb_info->gen, group->group_kill);
3225 	}
3226     }
3227 }
3228 
3229 
3230 /* Find all of the blocks that are not backwards reachable from the
3231    exit block or any block with no successors (BB).  These are the
3232    infinite loops or infinite self loops.  These blocks will still
3233    have their bits set in UNREACHABLE_BLOCKS.  */
3234 
3235 static void
mark_reachable_blocks(sbitmap unreachable_blocks,basic_block bb)3236 mark_reachable_blocks (sbitmap unreachable_blocks, basic_block bb)
3237 {
3238   edge e;
3239   edge_iterator ei;
3240 
3241   if (bitmap_bit_p (unreachable_blocks, bb->index))
3242     {
3243       bitmap_clear_bit (unreachable_blocks, bb->index);
3244       FOR_EACH_EDGE (e, ei, bb->preds)
3245 	{
3246 	  mark_reachable_blocks (unreachable_blocks, e->src);
3247 	}
3248     }
3249 }
3250 
3251 /* Build the transfer functions for the function.  */
3252 
3253 static void
dse_step3()3254 dse_step3 ()
3255 {
3256   basic_block bb;
3257   sbitmap_iterator sbi;
3258   bitmap all_ones = NULL;
3259   unsigned int i;
3260 
3261   auto_sbitmap unreachable_blocks (last_basic_block_for_fn (cfun));
3262   bitmap_ones (unreachable_blocks);
3263 
3264   FOR_ALL_BB_FN (bb, cfun)
3265     {
3266       bb_info_t bb_info = bb_table[bb->index];
3267       if (bb_info->gen)
3268 	bitmap_clear (bb_info->gen);
3269       else
3270 	bb_info->gen = BITMAP_ALLOC (&dse_bitmap_obstack);
3271 
3272       if (bb->index == ENTRY_BLOCK)
3273 	;
3274       else if (bb->index == EXIT_BLOCK)
3275 	dse_step3_exit_block_scan (bb_info);
3276       else
3277 	dse_step3_scan (bb);
3278       if (EDGE_COUNT (bb->succs) == 0)
3279 	mark_reachable_blocks (unreachable_blocks, bb);
3280 
3281       /* If this is the second time dataflow is run, delete the old
3282 	 sets.  */
3283       if (bb_info->in)
3284 	BITMAP_FREE (bb_info->in);
3285       if (bb_info->out)
3286 	BITMAP_FREE (bb_info->out);
3287     }
3288 
3289   /* For any block in an infinite loop, we must initialize the out set
3290      to all ones.  This could be expensive, but almost never occurs in
3291      practice. However, it is common in regression tests.  */
3292   EXECUTE_IF_SET_IN_BITMAP (unreachable_blocks, 0, i, sbi)
3293     {
3294       if (bitmap_bit_p (all_blocks, i))
3295 	{
3296 	  bb_info_t bb_info = bb_table[i];
3297 	  if (!all_ones)
3298 	    {
3299 	      unsigned int j;
3300 	      group_info *group;
3301 
3302 	      all_ones = BITMAP_ALLOC (&dse_bitmap_obstack);
3303 	      FOR_EACH_VEC_ELT (rtx_group_vec, j, group)
3304 		bitmap_ior_into (all_ones, group->group_kill);
3305 	    }
3306 	  if (!bb_info->out)
3307 	    {
3308 	      bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3309 	      bitmap_copy (bb_info->out, all_ones);
3310 	    }
3311 	}
3312     }
3313 
3314   if (all_ones)
3315     BITMAP_FREE (all_ones);
3316 }
3317 
3318 
3319 
3320 /*----------------------------------------------------------------------------
3321    Fourth step.
3322 
3323    Solve the bitvector equations.
3324 ----------------------------------------------------------------------------*/
3325 
3326 
3327 /* Confluence function for blocks with no successors.  Create an out
3328    set from the gen set of the exit block.  This block logically has
3329    the exit block as a successor.  */
3330 
3331 
3332 
3333 static void
dse_confluence_0(basic_block bb)3334 dse_confluence_0 (basic_block bb)
3335 {
3336   bb_info_t bb_info = bb_table[bb->index];
3337 
3338   if (bb->index == EXIT_BLOCK)
3339     return;
3340 
3341   if (!bb_info->out)
3342     {
3343       bb_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3344       bitmap_copy (bb_info->out, bb_table[EXIT_BLOCK]->gen);
3345     }
3346 }
3347 
3348 /* Propagate the information from the in set of the dest of E to the
3349    out set of the src of E.  If the various in or out sets are not
3350    there, that means they are all ones.  */
3351 
3352 static bool
dse_confluence_n(edge e)3353 dse_confluence_n (edge e)
3354 {
3355   bb_info_t src_info = bb_table[e->src->index];
3356   bb_info_t dest_info = bb_table[e->dest->index];
3357 
3358   if (dest_info->in)
3359     {
3360       if (src_info->out)
3361 	bitmap_and_into (src_info->out, dest_info->in);
3362       else
3363 	{
3364 	  src_info->out = BITMAP_ALLOC (&dse_bitmap_obstack);
3365 	  bitmap_copy (src_info->out, dest_info->in);
3366 	}
3367     }
3368   return true;
3369 }
3370 
3371 
3372 /* Propagate the info from the out to the in set of BB_INDEX's basic
3373    block.  There are three cases:
3374 
3375    1) The block has no kill set.  In this case the kill set is all
3376    ones.  It does not matter what the out set of the block is, none of
3377    the info can reach the top.  The only thing that reaches the top is
3378    the gen set and we just copy the set.
3379 
3380    2) There is a kill set but no out set and bb has successors.  In
3381    this case we just return. Eventually an out set will be created and
3382    it is better to wait than to create a set of ones.
3383 
3384    3) There is both a kill and out set.  We apply the obvious transfer
3385    function.
3386 */
3387 
3388 static bool
dse_transfer_function(int bb_index)3389 dse_transfer_function (int bb_index)
3390 {
3391   bb_info_t bb_info = bb_table[bb_index];
3392 
3393   if (bb_info->kill)
3394     {
3395       if (bb_info->out)
3396 	{
3397 	  /* Case 3 above.  */
3398 	  if (bb_info->in)
3399 	    return bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3400 					 bb_info->out, bb_info->kill);
3401 	  else
3402 	    {
3403 	      bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3404 	      bitmap_ior_and_compl (bb_info->in, bb_info->gen,
3405 				    bb_info->out, bb_info->kill);
3406 	      return true;
3407 	    }
3408 	}
3409       else
3410 	/* Case 2 above.  */
3411 	return false;
3412     }
3413   else
3414     {
3415       /* Case 1 above.  If there is already an in set, nothing
3416 	 happens.  */
3417       if (bb_info->in)
3418 	return false;
3419       else
3420 	{
3421 	  bb_info->in = BITMAP_ALLOC (&dse_bitmap_obstack);
3422 	  bitmap_copy (bb_info->in, bb_info->gen);
3423 	  return true;
3424 	}
3425     }
3426 }
3427 
3428 /* Solve the dataflow equations.  */
3429 
3430 static void
dse_step4(void)3431 dse_step4 (void)
3432 {
3433   df_simple_dataflow (DF_BACKWARD, NULL, dse_confluence_0,
3434 		      dse_confluence_n, dse_transfer_function,
3435 	   	      all_blocks, df_get_postorder (DF_BACKWARD),
3436 		      df_get_n_blocks (DF_BACKWARD));
3437   if (dump_file && (dump_flags & TDF_DETAILS))
3438     {
3439       basic_block bb;
3440 
3441       fprintf (dump_file, "\n\n*** Global dataflow info after analysis.\n");
3442       FOR_ALL_BB_FN (bb, cfun)
3443 	{
3444 	  bb_info_t bb_info = bb_table[bb->index];
3445 
3446 	  df_print_bb_index (bb, dump_file);
3447 	  if (bb_info->in)
3448 	    bitmap_print (dump_file, bb_info->in, "  in:   ", "\n");
3449 	  else
3450 	    fprintf (dump_file, "  in:   *MISSING*\n");
3451 	  if (bb_info->gen)
3452 	    bitmap_print (dump_file, bb_info->gen, "  gen:  ", "\n");
3453 	  else
3454 	    fprintf (dump_file, "  gen:  *MISSING*\n");
3455 	  if (bb_info->kill)
3456 	    bitmap_print (dump_file, bb_info->kill, "  kill: ", "\n");
3457 	  else
3458 	    fprintf (dump_file, "  kill: *MISSING*\n");
3459 	  if (bb_info->out)
3460 	    bitmap_print (dump_file, bb_info->out, "  out:  ", "\n");
3461 	  else
3462 	    fprintf (dump_file, "  out:  *MISSING*\n\n");
3463 	}
3464     }
3465 }
3466 
3467 
3468 
3469 /*----------------------------------------------------------------------------
3470    Fifth step.
3471 
3472    Delete the stores that can only be deleted using the global information.
3473 ----------------------------------------------------------------------------*/
3474 
3475 
3476 static void
dse_step5(void)3477 dse_step5 (void)
3478 {
3479   basic_block bb;
3480   FOR_EACH_BB_FN (bb, cfun)
3481     {
3482       bb_info_t bb_info = bb_table[bb->index];
3483       insn_info_t insn_info = bb_info->last_insn;
3484       bitmap v = bb_info->out;
3485 
3486       while (insn_info)
3487 	{
3488 	  bool deleted = false;
3489 	  if (dump_file && insn_info->insn)
3490 	    {
3491 	      fprintf (dump_file, "starting to process insn %d\n",
3492 		       INSN_UID (insn_info->insn));
3493 	      bitmap_print (dump_file, v, "  v:  ", "\n");
3494 	    }
3495 
3496 	  /* There may have been code deleted by the dce pass run before
3497 	     this phase.  */
3498 	  if (insn_info->insn
3499 	      && INSN_P (insn_info->insn)
3500 	      && (!insn_info->cannot_delete)
3501 	      && (!bitmap_empty_p (v)))
3502 	    {
3503 	      store_info *store_info = insn_info->store_rec;
3504 
3505 	      /* Try to delete the current insn.  */
3506 	      deleted = true;
3507 
3508 	      /* Skip the clobbers.  */
3509 	      while (!store_info->is_set)
3510 		store_info = store_info->next;
3511 
3512 	      HOST_WIDE_INT i, offset, width;
3513 	      group_info *group_info = rtx_group_vec[store_info->group_id];
3514 
3515 	      if (!store_info->offset.is_constant (&offset)
3516 		  || !store_info->width.is_constant (&width))
3517 		deleted = false;
3518 	      else
3519 		{
3520 		  HOST_WIDE_INT end = offset + width;
3521 		  for (i = offset; i < end; i++)
3522 		    {
3523 		      int index = get_bitmap_index (group_info, i);
3524 
3525 		      if (dump_file && (dump_flags & TDF_DETAILS))
3526 			fprintf (dump_file, "i = %d, index = %d\n",
3527 				 (int) i, index);
3528 		      if (index == 0 || !bitmap_bit_p (v, index))
3529 			{
3530 			  if (dump_file && (dump_flags & TDF_DETAILS))
3531 			    fprintf (dump_file, "failing at i = %d\n",
3532 				     (int) i);
3533 			  deleted = false;
3534 			  break;
3535 			}
3536 		    }
3537 		}
3538 	      if (deleted)
3539 		{
3540 		  if (dbg_cnt (dse)
3541 		      && check_for_inc_dec_1 (insn_info))
3542 		    {
3543 		      delete_insn (insn_info->insn);
3544 		      insn_info->insn = NULL;
3545 		      globally_deleted++;
3546 		    }
3547 		}
3548 	    }
3549 	  /* We do want to process the local info if the insn was
3550 	     deleted.  For instance, if the insn did a wild read, we
3551 	     no longer need to trash the info.  */
3552 	  if (insn_info->insn
3553 	      && INSN_P (insn_info->insn)
3554 	      && (!deleted))
3555 	    {
3556 	      scan_stores (insn_info->store_rec, v, NULL);
3557 	      if (insn_info->wild_read)
3558 		{
3559 		  if (dump_file && (dump_flags & TDF_DETAILS))
3560 		    fprintf (dump_file, "wild read\n");
3561 		  bitmap_clear (v);
3562 		}
3563 	      else if (insn_info->read_rec
3564 		       || insn_info->non_frame_wild_read
3565 		       || insn_info->frame_read)
3566 		{
3567 		  if (dump_file && (dump_flags & TDF_DETAILS))
3568 		    {
3569 		      if (!insn_info->non_frame_wild_read
3570 			  && !insn_info->frame_read)
3571 			fprintf (dump_file, "regular read\n");
3572 		      if (insn_info->non_frame_wild_read)
3573 			fprintf (dump_file, "non-frame wild read\n");
3574 		      if (insn_info->frame_read)
3575 			fprintf (dump_file, "frame read\n");
3576 		    }
3577 		  scan_reads (insn_info, v, NULL);
3578 		}
3579 	    }
3580 
3581 	  insn_info = insn_info->prev_insn;
3582 	}
3583     }
3584 }
3585 
3586 
3587 
3588 /*----------------------------------------------------------------------------
3589    Sixth step.
3590 
3591    Delete stores made redundant by earlier stores (which store the same
3592    value) that couldn't be eliminated.
3593 ----------------------------------------------------------------------------*/
3594 
3595 static void
dse_step6(void)3596 dse_step6 (void)
3597 {
3598   basic_block bb;
3599 
3600   FOR_ALL_BB_FN (bb, cfun)
3601     {
3602       bb_info_t bb_info = bb_table[bb->index];
3603       insn_info_t insn_info = bb_info->last_insn;
3604 
3605       while (insn_info)
3606 	{
3607 	  /* There may have been code deleted by the dce pass run before
3608 	     this phase.  */
3609 	  if (insn_info->insn
3610 	      && INSN_P (insn_info->insn)
3611 	      && !insn_info->cannot_delete)
3612 	    {
3613 	      store_info *s_info = insn_info->store_rec;
3614 
3615 	      while (s_info && !s_info->is_set)
3616 		s_info = s_info->next;
3617 	      if (s_info
3618 		  && s_info->redundant_reason
3619 		  && s_info->redundant_reason->insn
3620 		  && INSN_P (s_info->redundant_reason->insn))
3621 		{
3622 		  rtx_insn *rinsn = s_info->redundant_reason->insn;
3623 		  if (dump_file && (dump_flags & TDF_DETAILS))
3624 		    fprintf (dump_file, "Locally deleting insn %d "
3625 					"because insn %d stores the "
3626 					"same value and couldn't be "
3627 					"eliminated\n",
3628 					INSN_UID (insn_info->insn),
3629 					INSN_UID (rinsn));
3630 		  delete_dead_store_insn (insn_info);
3631 		}
3632 	    }
3633 	  insn_info = insn_info->prev_insn;
3634 	}
3635     }
3636 }
3637 
3638 /*----------------------------------------------------------------------------
3639    Seventh step.
3640 
3641    Destroy everything left standing.
3642 ----------------------------------------------------------------------------*/
3643 
3644 static void
dse_step7(void)3645 dse_step7 (void)
3646 {
3647   bitmap_obstack_release (&dse_bitmap_obstack);
3648   obstack_free (&dse_obstack, NULL);
3649 
3650   end_alias_analysis ();
3651   free (bb_table);
3652   delete rtx_group_table;
3653   rtx_group_table = NULL;
3654   rtx_group_vec.release ();
3655   BITMAP_FREE (all_blocks);
3656   BITMAP_FREE (scratch);
3657 
3658   rtx_store_info_pool.release ();
3659   read_info_type_pool.release ();
3660   insn_info_type_pool.release ();
3661   dse_bb_info_type_pool.release ();
3662   group_info_pool.release ();
3663   deferred_change_pool.release ();
3664 }
3665 
3666 
3667 /* -------------------------------------------------------------------------
3668    DSE
3669    ------------------------------------------------------------------------- */
3670 
3671 /* Callback for running pass_rtl_dse.  */
3672 
3673 static unsigned int
rest_of_handle_dse(void)3674 rest_of_handle_dse (void)
3675 {
3676   df_set_flags (DF_DEFER_INSN_RESCAN);
3677 
3678   /* Need the notes since we must track live hardregs in the forwards
3679      direction.  */
3680   df_note_add_problem ();
3681   df_analyze ();
3682 
3683   dse_step0 ();
3684   dse_step1 ();
3685   dse_step2_init ();
3686   if (dse_step2 ())
3687     {
3688       df_set_flags (DF_LR_RUN_DCE);
3689       df_analyze ();
3690       if (dump_file && (dump_flags & TDF_DETAILS))
3691 	fprintf (dump_file, "doing global processing\n");
3692       dse_step3 ();
3693       dse_step4 ();
3694       dse_step5 ();
3695     }
3696 
3697   dse_step6 ();
3698   dse_step7 ();
3699 
3700   if (dump_file)
3701     fprintf (dump_file, "dse: local deletions = %d, global deletions = %d\n",
3702 	     locally_deleted, globally_deleted);
3703 
3704   /* DSE can eliminate potentially-trapping MEMs.
3705      Remove any EH edges associated with them.  */
3706   if ((locally_deleted || globally_deleted)
3707       && cfun->can_throw_non_call_exceptions
3708       && purge_all_dead_edges ())
3709     {
3710       free_dominance_info (CDI_DOMINATORS);
3711       cleanup_cfg (0);
3712     }
3713 
3714   return 0;
3715 }
3716 
3717 namespace {
3718 
3719 const pass_data pass_data_rtl_dse1 =
3720 {
3721   RTL_PASS, /* type */
3722   "dse1", /* name */
3723   OPTGROUP_NONE, /* optinfo_flags */
3724   TV_DSE1, /* tv_id */
3725   0, /* properties_required */
3726   0, /* properties_provided */
3727   0, /* properties_destroyed */
3728   0, /* todo_flags_start */
3729   TODO_df_finish, /* todo_flags_finish */
3730 };
3731 
3732 class pass_rtl_dse1 : public rtl_opt_pass
3733 {
3734 public:
pass_rtl_dse1(gcc::context * ctxt)3735   pass_rtl_dse1 (gcc::context *ctxt)
3736     : rtl_opt_pass (pass_data_rtl_dse1, ctxt)
3737   {}
3738 
3739   /* opt_pass methods: */
gate(function *)3740   virtual bool gate (function *)
3741     {
3742       return optimize > 0 && flag_dse && dbg_cnt (dse1);
3743     }
3744 
execute(function *)3745   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3746 
3747 }; // class pass_rtl_dse1
3748 
3749 } // anon namespace
3750 
3751 rtl_opt_pass *
make_pass_rtl_dse1(gcc::context * ctxt)3752 make_pass_rtl_dse1 (gcc::context *ctxt)
3753 {
3754   return new pass_rtl_dse1 (ctxt);
3755 }
3756 
3757 namespace {
3758 
3759 const pass_data pass_data_rtl_dse2 =
3760 {
3761   RTL_PASS, /* type */
3762   "dse2", /* name */
3763   OPTGROUP_NONE, /* optinfo_flags */
3764   TV_DSE2, /* tv_id */
3765   0, /* properties_required */
3766   0, /* properties_provided */
3767   0, /* properties_destroyed */
3768   0, /* todo_flags_start */
3769   TODO_df_finish, /* todo_flags_finish */
3770 };
3771 
3772 class pass_rtl_dse2 : public rtl_opt_pass
3773 {
3774 public:
pass_rtl_dse2(gcc::context * ctxt)3775   pass_rtl_dse2 (gcc::context *ctxt)
3776     : rtl_opt_pass (pass_data_rtl_dse2, ctxt)
3777   {}
3778 
3779   /* opt_pass methods: */
gate(function *)3780   virtual bool gate (function *)
3781     {
3782       return optimize > 0 && flag_dse && dbg_cnt (dse2);
3783     }
3784 
execute(function *)3785   virtual unsigned int execute (function *) { return rest_of_handle_dse (); }
3786 
3787 }; // class pass_rtl_dse2
3788 
3789 } // anon namespace
3790 
3791 rtl_opt_pass *
make_pass_rtl_dse2(gcc::context * ctxt)3792 make_pass_rtl_dse2 (gcc::context *ctxt)
3793 {
3794   return new pass_rtl_dse2 (ctxt);
3795 }
3796