xref: /dragonfly/contrib/gcc-4.7/gcc/gcse.c (revision ec21d9fb)
1 /* Partial redundancy elimination / Hoisting for RTL.
2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3    2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* TODO
22    - reordering of memory allocation and freeing to be more space efficient
23    - do rough calc of how many regs are needed in each block, and a rough
24      calc of how many regs are available in each class and use that to
25      throttle back the code in cases where RTX_COST is minimal.
26 */
27 
28 /* References searched while implementing this.
29 
30    Compilers Principles, Techniques and Tools
31    Aho, Sethi, Ullman
32    Addison-Wesley, 1988
33 
34    Global Optimization by Suppression of Partial Redundancies
35    E. Morel, C. Renvoise
36    communications of the acm, Vol. 22, Num. 2, Feb. 1979
37 
38    A Portable Machine-Independent Global Optimizer - Design and Measurements
39    Frederick Chow
40    Stanford Ph.D. thesis, Dec. 1983
41 
42    A Fast Algorithm for Code Movement Optimization
43    D.M. Dhamdhere
44    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
45 
46    A Solution to a Problem with Morel and Renvoise's
47    Global Optimization by Suppression of Partial Redundancies
48    K-H Drechsler, M.P. Stadel
49    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
50 
51    Practical Adaptation of the Global Optimization
52    Algorithm of Morel and Renvoise
53    D.M. Dhamdhere
54    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
55 
56    Efficiently Computing Static Single Assignment Form and the Control
57    Dependence Graph
58    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
59    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
60 
61    Lazy Code Motion
62    J. Knoop, O. Ruthing, B. Steffen
63    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
64 
65    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
66    Time for Reducible Flow Control
67    Thomas Ball
68    ACM Letters on Programming Languages and Systems,
69    Vol. 2, Num. 1-4, Mar-Dec 1993
70 
71    An Efficient Representation for Sparse Sets
72    Preston Briggs, Linda Torczon
73    ACM Letters on Programming Languages and Systems,
74    Vol. 2, Num. 1-4, Mar-Dec 1993
75 
76    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
77    K-H Drechsler, M.P. Stadel
78    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
79 
80    Partial Dead Code Elimination
81    J. Knoop, O. Ruthing, B. Steffen
82    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
83 
84    Effective Partial Redundancy Elimination
85    P. Briggs, K.D. Cooper
86    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
87 
88    The Program Structure Tree: Computing Control Regions in Linear Time
89    R. Johnson, D. Pearson, K. Pingali
90    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
91 
92    Optimal Code Motion: Theory and Practice
93    J. Knoop, O. Ruthing, B. Steffen
94    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
95 
96    The power of assignment motion
97    J. Knoop, O. Ruthing, B. Steffen
98    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
99 
100    Global code motion / global value numbering
101    C. Click
102    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
103 
104    Value Driven Redundancy Elimination
105    L.T. Simpson
106    Rice University Ph.D. thesis, Apr. 1996
107 
108    Value Numbering
109    L.T. Simpson
110    Massively Scalar Compiler Project, Rice University, Sep. 1996
111 
112    High Performance Compilers for Parallel Computing
113    Michael Wolfe
114    Addison-Wesley, 1996
115 
116    Advanced Compiler Design and Implementation
117    Steven Muchnick
118    Morgan Kaufmann, 1997
119 
120    Building an Optimizing Compiler
121    Robert Morgan
122    Digital Press, 1998
123 
124    People wishing to speed up the code here should read:
125      Elimination Algorithms for Data Flow Analysis
126      B.G. Ryder, M.C. Paull
127      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
128 
129      How to Analyze Large Programs Efficiently and Informatively
130      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
131      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
132 
133    People wishing to do something different can find various possibilities
134    in the above papers and elsewhere.
135 */
136 
137 #include "config.h"
138 #include "system.h"
139 #include "coretypes.h"
140 #include "tm.h"
141 #include "diagnostic-core.h"
142 #include "toplev.h"
143 
144 #include "rtl.h"
145 #include "tree.h"
146 #include "tm_p.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "insn-config.h"
151 #include "recog.h"
152 #include "basic-block.h"
153 #include "output.h"
154 #include "function.h"
155 #include "expr.h"
156 #include "except.h"
157 #include "ggc.h"
158 #include "params.h"
159 #include "cselib.h"
160 #include "intl.h"
161 #include "obstack.h"
162 #include "timevar.h"
163 #include "tree-pass.h"
164 #include "hashtab.h"
165 #include "df.h"
166 #include "dbgcnt.h"
167 #include "target.h"
168 #include "gcse.h"
169 
170 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
171    are a superset of those done by classic GCSE.
172 
173    Two passes of copy/constant propagation are done around PRE or hoisting
174    because the first one enables more GCSE and the second one helps to clean
175    up the copies that PRE and HOIST create.  This is needed more for PRE than
176    for HOIST because code hoisting will try to use an existing register
177    containing the common subexpression rather than create a new one.  This is
178    harder to do for PRE because of the code motion (which HOIST doesn't do).
179 
180    Expressions we are interested in GCSE-ing are of the form
181    (set (pseudo-reg) (expression)).
182    Function want_to_gcse_p says what these are.
183 
184    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
185    This allows PRE to hoist expressions that are expressed in multiple insns,
186    such as complex address calculations (e.g. for PIC code, or loads with a
187    high part and a low part).
188 
189    PRE handles moving invariant expressions out of loops (by treating them as
190    partially redundant).
191 
192    **********************
193 
194    We used to support multiple passes but there are diminishing returns in
195    doing so.  The first pass usually makes 90% of the changes that are doable.
196    A second pass can make a few more changes made possible by the first pass.
197    Experiments show any further passes don't make enough changes to justify
198    the expense.
199 
200    A study of spec92 using an unlimited number of passes:
201    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
202    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
203    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
204 
205    It was found doing copy propagation between each pass enables further
206    substitutions.
207 
208    This study was done before expressions in REG_EQUAL notes were added as
209    candidate expressions for optimization, and before the GIMPLE optimizers
210    were added.  Probably, multiple passes is even less efficient now than
211    at the time when the study was conducted.
212 
213    PRE is quite expensive in complicated functions because the DFA can take
214    a while to converge.  Hence we only perform one pass.
215 
216    **********************
217 
218    The steps for PRE are:
219 
220    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
221 
222    2) Perform the data flow analysis for PRE.
223 
224    3) Delete the redundant instructions
225 
226    4) Insert the required copies [if any] that make the partially
227       redundant instructions fully redundant.
228 
229    5) For other reaching expressions, insert an instruction to copy the value
230       to a newly created pseudo that will reach the redundant instruction.
231 
232    The deletion is done first so that when we do insertions we
233    know which pseudo reg to use.
234 
235    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
236    argue it is not.  The number of iterations for the algorithm to converge
237    is typically 2-4 so I don't view it as that expensive (relatively speaking).
238 
239    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
240    we create.  To make an expression reach the place where it's redundant,
241    the result of the expression is copied to a new register, and the redundant
242    expression is deleted by replacing it with this new register.  Classic GCSE
243    doesn't have this problem as much as it computes the reaching defs of
244    each register in each block and thus can try to use an existing
245    register.  */
246 
247 /* GCSE global vars.  */
248 
249 struct target_gcse default_target_gcse;
250 #if SWITCHABLE_TARGET
251 struct target_gcse *this_target_gcse = &default_target_gcse;
252 #endif
253 
254 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
255 int flag_rerun_cse_after_global_opts;
256 
257 /* An obstack for our working variables.  */
258 static struct obstack gcse_obstack;
259 
260 struct reg_use {rtx reg_rtx; };
261 
262 /* Hash table of expressions.  */
263 
264 struct expr
265 {
266   /* The expression.  */
267   rtx expr;
268   /* Index in the available expression bitmaps.  */
269   int bitmap_index;
270   /* Next entry with the same hash.  */
271   struct expr *next_same_hash;
272   /* List of anticipatable occurrences in basic blocks in the function.
273      An "anticipatable occurrence" is one that is the first occurrence in the
274      basic block, the operands are not modified in the basic block prior
275      to the occurrence and the output is not used between the start of
276      the block and the occurrence.  */
277   struct occr *antic_occr;
278   /* List of available occurrence in basic blocks in the function.
279      An "available occurrence" is one that is the last occurrence in the
280      basic block and the operands are not modified by following statements in
281      the basic block [including this insn].  */
282   struct occr *avail_occr;
283   /* Non-null if the computation is PRE redundant.
284      The value is the newly created pseudo-reg to record a copy of the
285      expression in all the places that reach the redundant copy.  */
286   rtx reaching_reg;
287   /* Maximum distance in instructions this expression can travel.
288      We avoid moving simple expressions for more than a few instructions
289      to keep register pressure under control.
290      A value of "0" removes restrictions on how far the expression can
291      travel.  */
292   int max_distance;
293 };
294 
295 /* Occurrence of an expression.
296    There is one per basic block.  If a pattern appears more than once the
297    last appearance is used [or first for anticipatable expressions].  */
298 
299 struct occr
300 {
301   /* Next occurrence of this expression.  */
302   struct occr *next;
303   /* The insn that computes the expression.  */
304   rtx insn;
305   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
306   char deleted_p;
307   /* Nonzero if this [available] occurrence has been copied to
308      reaching_reg.  */
309   /* ??? This is mutually exclusive with deleted_p, so they could share
310      the same byte.  */
311   char copied_p;
312 };
313 
314 typedef struct occr *occr_t;
315 DEF_VEC_P (occr_t);
316 DEF_VEC_ALLOC_P (occr_t, heap);
317 
318 /* Expression hash tables.
319    Each hash table is an array of buckets.
320    ??? It is known that if it were an array of entries, structure elements
321    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
322    not clear whether in the final analysis a sufficient amount of memory would
323    be saved as the size of the available expression bitmaps would be larger
324    [one could build a mapping table without holes afterwards though].
325    Someday I'll perform the computation and figure it out.  */
326 
327 struct hash_table_d
328 {
329   /* The table itself.
330      This is an array of `expr_hash_table_size' elements.  */
331   struct expr **table;
332 
333   /* Size of the hash table, in elements.  */
334   unsigned int size;
335 
336   /* Number of hash table elements.  */
337   unsigned int n_elems;
338 };
339 
340 /* Expression hash table.  */
341 static struct hash_table_d expr_hash_table;
342 
343 /* This is a list of expressions which are MEMs and will be used by load
344    or store motion.
345    Load motion tracks MEMs which aren't killed by anything except itself,
346    i.e. loads and stores to a single location.
347    We can then allow movement of these MEM refs with a little special
348    allowance. (all stores copy the same value to the reaching reg used
349    for the loads).  This means all values used to store into memory must have
350    no side effects so we can re-issue the setter value.  */
351 
352 struct ls_expr
353 {
354   struct expr * expr;		/* Gcse expression reference for LM.  */
355   rtx pattern;			/* Pattern of this mem.  */
356   rtx pattern_regs;		/* List of registers mentioned by the mem.  */
357   rtx loads;			/* INSN list of loads seen.  */
358   rtx stores;			/* INSN list of stores seen.  */
359   struct ls_expr * next;	/* Next in the list.  */
360   int invalid;			/* Invalid for some reason.  */
361   int index;			/* If it maps to a bitmap index.  */
362   unsigned int hash_index;	/* Index when in a hash table.  */
363   rtx reaching_reg;		/* Register to use when re-writing.  */
364 };
365 
366 /* Head of the list of load/store memory refs.  */
367 static struct ls_expr * pre_ldst_mems = NULL;
368 
369 /* Hashtable for the load/store memory refs.  */
370 static htab_t pre_ldst_table = NULL;
371 
372 /* Bitmap containing one bit for each register in the program.
373    Used when performing GCSE to track which registers have been set since
374    the start of the basic block.  */
375 static regset reg_set_bitmap;
376 
377 /* Array, indexed by basic block number for a list of insns which modify
378    memory within that block.  */
379 static VEC (rtx,heap) **modify_mem_list;
380 static bitmap modify_mem_list_set;
381 
382 typedef struct modify_pair_s
383 {
384   rtx dest;			/* A MEM.  */
385   rtx dest_addr;		/* The canonical address of `dest'.  */
386 } modify_pair;
387 
388 DEF_VEC_O(modify_pair);
389 DEF_VEC_ALLOC_O(modify_pair,heap);
390 
391 /* This array parallels modify_mem_list, except that it stores MEMs
392    being set and their canonicalized memory addresses.  */
393 static VEC (modify_pair,heap) **canon_modify_mem_list;
394 
395 /* Bitmap indexed by block numbers to record which blocks contain
396    function calls.  */
397 static bitmap blocks_with_calls;
398 
399 /* Various variables for statistics gathering.  */
400 
401 /* Memory used in a pass.
402    This isn't intended to be absolutely precise.  Its intent is only
403    to keep an eye on memory usage.  */
404 static int bytes_used;
405 
406 /* GCSE substitutions made.  */
407 static int gcse_subst_count;
408 /* Number of copy instructions created.  */
409 static int gcse_create_count;
410 
411 /* Doing code hoisting.  */
412 static bool doing_code_hoisting_p = false;
413 
414 /* For available exprs */
415 static sbitmap *ae_kill;
416 
417 static void compute_can_copy (void);
418 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
419 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
420 static void *gcse_alloc (unsigned long);
421 static void alloc_gcse_mem (void);
422 static void free_gcse_mem (void);
423 static void hash_scan_insn (rtx, struct hash_table_d *);
424 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
425 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
426 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
427 static int want_to_gcse_p (rtx, int *);
428 static int oprs_unchanged_p (const_rtx, const_rtx, int);
429 static int oprs_anticipatable_p (const_rtx, const_rtx);
430 static int oprs_available_p (const_rtx, const_rtx);
431 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
432 				  struct hash_table_d *);
433 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
434 static int expr_equiv_p (const_rtx, const_rtx);
435 static void record_last_reg_set_info (rtx, int);
436 static void record_last_mem_set_info (rtx);
437 static void record_last_set_info (rtx, const_rtx, void *);
438 static void compute_hash_table (struct hash_table_d *);
439 static void alloc_hash_table (struct hash_table_d *);
440 static void free_hash_table (struct hash_table_d *);
441 static void compute_hash_table_work (struct hash_table_d *);
442 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
443 static void compute_transp (const_rtx, int, sbitmap *);
444 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
445 				      struct hash_table_d *);
446 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
447 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
448 static void canon_list_insert (rtx, const_rtx, void *);
449 static void alloc_pre_mem (int, int);
450 static void free_pre_mem (void);
451 static struct edge_list *compute_pre_data (void);
452 static int pre_expr_reaches_here_p (basic_block, struct expr *,
453 				    basic_block);
454 static void insert_insn_end_basic_block (struct expr *, basic_block);
455 static void pre_insert_copy_insn (struct expr *, rtx);
456 static void pre_insert_copies (void);
457 static int pre_delete (void);
458 static int pre_gcse (struct edge_list *);
459 static int one_pre_gcse_pass (void);
460 static void add_label_notes (rtx, rtx);
461 static void alloc_code_hoist_mem (int, int);
462 static void free_code_hoist_mem (void);
463 static void compute_code_hoist_vbeinout (void);
464 static void compute_code_hoist_data (void);
465 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
466 				      int, int *);
467 static int hoist_code (void);
468 static int one_code_hoisting_pass (void);
469 static rtx process_insert_insn (struct expr *);
470 static int pre_edge_insert (struct edge_list *, struct expr **);
471 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
472 					 basic_block, char *);
473 static struct ls_expr * ldst_entry (rtx);
474 static void free_ldst_entry (struct ls_expr *);
475 static void free_ld_motion_mems (void);
476 static void print_ldst_list (FILE *);
477 static struct ls_expr * find_rtx_in_ldst (rtx);
478 static int simple_mem (const_rtx);
479 static void invalidate_any_buried_refs (rtx);
480 static void compute_ld_motion_mems (void);
481 static void trim_ld_motion_mems (void);
482 static void update_ld_motion_stores (struct expr *);
483 static void clear_modify_mem_tables (void);
484 static void free_modify_mem_tables (void);
485 static rtx gcse_emit_move_after (rtx, rtx, rtx);
486 static bool is_too_expensive (const char *);
487 
488 #define GNEW(T)			((T *) gmalloc (sizeof (T)))
489 #define GCNEW(T)		((T *) gcalloc (1, sizeof (T)))
490 
491 #define GNEWVEC(T, N)		((T *) gmalloc (sizeof (T) * (N)))
492 #define GCNEWVEC(T, N)		((T *) gcalloc ((N), sizeof (T)))
493 
494 #define GNEWVAR(T, S)		((T *) gmalloc ((S)))
495 #define GCNEWVAR(T, S)		((T *) gcalloc (1, (S)))
496 
497 #define GOBNEW(T)		((T *) gcse_alloc (sizeof (T)))
498 #define GOBNEWVAR(T, S)		((T *) gcse_alloc ((S)))
499 
500 /* Misc. utilities.  */
501 
502 #define can_copy \
503   (this_target_gcse->x_can_copy)
504 #define can_copy_init_p \
505   (this_target_gcse->x_can_copy_init_p)
506 
507 /* Compute which modes support reg/reg copy operations.  */
508 
509 static void
510 compute_can_copy (void)
511 {
512   int i;
513 #ifndef AVOID_CCMODE_COPIES
514   rtx reg, insn;
515 #endif
516   memset (can_copy, 0, NUM_MACHINE_MODES);
517 
518   start_sequence ();
519   for (i = 0; i < NUM_MACHINE_MODES; i++)
520     if (GET_MODE_CLASS (i) == MODE_CC)
521       {
522 #ifdef AVOID_CCMODE_COPIES
523 	can_copy[i] = 0;
524 #else
525 	reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
526 	insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
527 	if (recog (PATTERN (insn), insn, NULL) >= 0)
528 	  can_copy[i] = 1;
529 #endif
530       }
531     else
532       can_copy[i] = 1;
533 
534   end_sequence ();
535 }
536 
537 /* Returns whether the mode supports reg/reg copy operations.  */
538 
539 bool
540 can_copy_p (enum machine_mode mode)
541 {
542   if (! can_copy_init_p)
543     {
544       compute_can_copy ();
545       can_copy_init_p = true;
546     }
547 
548   return can_copy[mode] != 0;
549 }
550 
551 /* Cover function to xmalloc to record bytes allocated.  */
552 
553 static void *
554 gmalloc (size_t size)
555 {
556   bytes_used += size;
557   return xmalloc (size);
558 }
559 
560 /* Cover function to xcalloc to record bytes allocated.  */
561 
562 static void *
563 gcalloc (size_t nelem, size_t elsize)
564 {
565   bytes_used += nelem * elsize;
566   return xcalloc (nelem, elsize);
567 }
568 
569 /* Cover function to obstack_alloc.  */
570 
571 static void *
572 gcse_alloc (unsigned long size)
573 {
574   bytes_used += size;
575   return obstack_alloc (&gcse_obstack, size);
576 }
577 
578 /* Allocate memory for the reg/memory set tracking tables.
579    This is called at the start of each pass.  */
580 
581 static void
582 alloc_gcse_mem (void)
583 {
584   /* Allocate vars to track sets of regs.  */
585   reg_set_bitmap = ALLOC_REG_SET (NULL);
586 
587   /* Allocate array to keep a list of insns which modify memory in each
588      basic block.  */
589   modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
590   canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
591 				    last_basic_block);
592   modify_mem_list_set = BITMAP_ALLOC (NULL);
593   blocks_with_calls = BITMAP_ALLOC (NULL);
594 }
595 
596 /* Free memory allocated by alloc_gcse_mem.  */
597 
598 static void
599 free_gcse_mem (void)
600 {
601   FREE_REG_SET (reg_set_bitmap);
602 
603   free_modify_mem_tables ();
604   BITMAP_FREE (modify_mem_list_set);
605   BITMAP_FREE (blocks_with_calls);
606 }
607 
608 /* Compute the local properties of each recorded expression.
609 
610    Local properties are those that are defined by the block, irrespective of
611    other blocks.
612 
613    An expression is transparent in a block if its operands are not modified
614    in the block.
615 
616    An expression is computed (locally available) in a block if it is computed
617    at least once and expression would contain the same value if the
618    computation was moved to the end of the block.
619 
620    An expression is locally anticipatable in a block if it is computed at
621    least once and expression would contain the same value if the computation
622    was moved to the beginning of the block.
623 
624    We call this routine for pre and code hoisting.  They all compute
625    basically the same information and thus can easily share this code.
626 
627    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
628    properties.  If NULL, then it is not necessary to compute or record that
629    particular property.
630 
631    TABLE controls which hash table to look at.  */
632 
633 static void
634 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
635 			  struct hash_table_d *table)
636 {
637   unsigned int i;
638 
639   /* Initialize any bitmaps that were passed in.  */
640   if (transp)
641     {
642       sbitmap_vector_ones (transp, last_basic_block);
643     }
644 
645   if (comp)
646     sbitmap_vector_zero (comp, last_basic_block);
647   if (antloc)
648     sbitmap_vector_zero (antloc, last_basic_block);
649 
650   for (i = 0; i < table->size; i++)
651     {
652       struct expr *expr;
653 
654       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
655 	{
656 	  int indx = expr->bitmap_index;
657 	  struct occr *occr;
658 
659 	  /* The expression is transparent in this block if it is not killed.
660 	     We start by assuming all are transparent [none are killed], and
661 	     then reset the bits for those that are.  */
662 	  if (transp)
663 	    compute_transp (expr->expr, indx, transp);
664 
665 	  /* The occurrences recorded in antic_occr are exactly those that
666 	     we want to set to nonzero in ANTLOC.  */
667 	  if (antloc)
668 	    for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
669 	      {
670 		SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
671 
672 		/* While we're scanning the table, this is a good place to
673 		   initialize this.  */
674 		occr->deleted_p = 0;
675 	      }
676 
677 	  /* The occurrences recorded in avail_occr are exactly those that
678 	     we want to set to nonzero in COMP.  */
679 	  if (comp)
680 	    for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
681 	      {
682 		SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
683 
684 		/* While we're scanning the table, this is a good place to
685 		   initialize this.  */
686 		occr->copied_p = 0;
687 	      }
688 
689 	  /* While we're scanning the table, this is a good place to
690 	     initialize this.  */
691 	  expr->reaching_reg = 0;
692 	}
693     }
694 }
695 
696 /* Hash table support.  */
697 
698 struct reg_avail_info
699 {
700   basic_block last_bb;
701   int first_set;
702   int last_set;
703 };
704 
705 static struct reg_avail_info *reg_avail_info;
706 static basic_block current_bb;
707 
708 /* See whether X, the source of a set, is something we want to consider for
709    GCSE.  */
710 
711 static int
712 want_to_gcse_p (rtx x, int *max_distance_ptr)
713 {
714 #ifdef STACK_REGS
715   /* On register stack architectures, don't GCSE constants from the
716      constant pool, as the benefits are often swamped by the overhead
717      of shuffling the register stack between basic blocks.  */
718   if (IS_STACK_MODE (GET_MODE (x)))
719     x = avoid_constant_pool_reference (x);
720 #endif
721 
722   /* GCSE'ing constants:
723 
724      We do not specifically distinguish between constant and non-constant
725      expressions in PRE and Hoist.  We use set_src_cost below to limit
726      the maximum distance simple expressions can travel.
727 
728      Nevertheless, constants are much easier to GCSE, and, hence,
729      it is easy to overdo the optimizations.  Usually, excessive PRE and
730      Hoisting of constant leads to increased register pressure.
731 
732      RA can deal with this by rematerialing some of the constants.
733      Therefore, it is important that the back-end generates sets of constants
734      in a way that allows reload rematerialize them under high register
735      pressure, i.e., a pseudo register with REG_EQUAL to constant
736      is set only once.  Failing to do so will result in IRA/reload
737      spilling such constants under high register pressure instead of
738      rematerializing them.  */
739 
740   switch (GET_CODE (x))
741     {
742     case REG:
743     case SUBREG:
744     case CALL:
745       return 0;
746 
747     case CONST_INT:
748     case CONST_DOUBLE:
749     case CONST_FIXED:
750     case CONST_VECTOR:
751       if (!doing_code_hoisting_p)
752 	/* Do not PRE constants.  */
753 	return 0;
754 
755       /* FALLTHRU */
756 
757     default:
758       if (doing_code_hoisting_p)
759 	/* PRE doesn't implement max_distance restriction.  */
760 	{
761 	  int cost;
762 	  int max_distance;
763 
764 	  gcc_assert (!optimize_function_for_speed_p (cfun)
765 		      && optimize_function_for_size_p (cfun));
766 	  cost = set_src_cost (x, 0);
767 
768 	  if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
769 	    {
770 	      max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
771 	      if (max_distance == 0)
772 		return 0;
773 
774 	      gcc_assert (max_distance > 0);
775 	    }
776 	  else
777 	    max_distance = 0;
778 
779 	  if (max_distance_ptr)
780 	    *max_distance_ptr = max_distance;
781 	}
782 
783       return can_assign_to_reg_without_clobbers_p (x);
784     }
785 }
786 
787 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
788 
789 static GTY(()) rtx test_insn;
790 
791 /* Return true if we can assign X to a pseudo register such that the
792    resulting insn does not result in clobbering a hard register as a
793    side-effect.
794 
795    Additionally, if the target requires it, check that the resulting insn
796    can be copied.  If it cannot, this means that X is special and probably
797    has hidden side-effects we don't want to mess with.
798 
799    This function is typically used by code motion passes, to verify
800    that it is safe to insert an insn without worrying about clobbering
801    maybe live hard regs.  */
802 
803 bool
804 can_assign_to_reg_without_clobbers_p (rtx x)
805 {
806   int num_clobbers = 0;
807   int icode;
808 
809   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
810   if (general_operand (x, GET_MODE (x)))
811     return 1;
812   else if (GET_MODE (x) == VOIDmode)
813     return 0;
814 
815   /* Otherwise, check if we can make a valid insn from it.  First initialize
816      our test insn if we haven't already.  */
817   if (test_insn == 0)
818     {
819       test_insn
820 	= make_insn_raw (gen_rtx_SET (VOIDmode,
821 				      gen_rtx_REG (word_mode,
822 						   FIRST_PSEUDO_REGISTER * 2),
823 				      const0_rtx));
824       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
825     }
826 
827   /* Now make an insn like the one we would make when GCSE'ing and see if
828      valid.  */
829   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
830   SET_SRC (PATTERN (test_insn)) = x;
831 
832   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
833   if (icode < 0)
834     return false;
835 
836   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
837     return false;
838 
839   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
840     return false;
841 
842   return true;
843 }
844 
845 /* Return nonzero if the operands of expression X are unchanged from the
846    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
847    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
848 
849 static int
850 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
851 {
852   int i, j;
853   enum rtx_code code;
854   const char *fmt;
855 
856   if (x == 0)
857     return 1;
858 
859   code = GET_CODE (x);
860   switch (code)
861     {
862     case REG:
863       {
864 	struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
865 
866 	if (info->last_bb != current_bb)
867 	  return 1;
868 	if (avail_p)
869 	  return info->last_set < DF_INSN_LUID (insn);
870 	else
871 	  return info->first_set >= DF_INSN_LUID (insn);
872       }
873 
874     case MEM:
875       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
876 				  x, avail_p))
877 	return 0;
878       else
879 	return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
880 
881     case PRE_DEC:
882     case PRE_INC:
883     case POST_DEC:
884     case POST_INC:
885     case PRE_MODIFY:
886     case POST_MODIFY:
887       return 0;
888 
889     case PC:
890     case CC0: /*FIXME*/
891     case CONST:
892     case CONST_INT:
893     case CONST_DOUBLE:
894     case CONST_FIXED:
895     case CONST_VECTOR:
896     case SYMBOL_REF:
897     case LABEL_REF:
898     case ADDR_VEC:
899     case ADDR_DIFF_VEC:
900       return 1;
901 
902     default:
903       break;
904     }
905 
906   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
907     {
908       if (fmt[i] == 'e')
909 	{
910 	  /* If we are about to do the last recursive call needed at this
911 	     level, change it into iteration.  This function is called enough
912 	     to be worth it.  */
913 	  if (i == 0)
914 	    return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
915 
916 	  else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
917 	    return 0;
918 	}
919       else if (fmt[i] == 'E')
920 	for (j = 0; j < XVECLEN (x, i); j++)
921 	  if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
922 	    return 0;
923     }
924 
925   return 1;
926 }
927 
928 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
929 
930 struct mem_conflict_info
931 {
932   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
933      see if a memory store conflicts with this memory load.  */
934   const_rtx mem;
935 
936   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
937      references.  */
938   bool conflict;
939 };
940 
941 /* DEST is the output of an instruction.  If it is a memory reference and
942    possibly conflicts with the load found in DATA, then communicate this
943    information back through DATA.  */
944 
945 static void
946 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
947 			  void *data)
948 {
949   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
950 
951   while (GET_CODE (dest) == SUBREG
952 	 || GET_CODE (dest) == ZERO_EXTRACT
953 	 || GET_CODE (dest) == STRICT_LOW_PART)
954     dest = XEXP (dest, 0);
955 
956   /* If DEST is not a MEM, then it will not conflict with the load.  Note
957      that function calls are assumed to clobber memory, but are handled
958      elsewhere.  */
959   if (! MEM_P (dest))
960     return;
961 
962   /* If we are setting a MEM in our list of specially recognized MEMs,
963      don't mark as killed this time.  */
964   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
965     {
966       if (!find_rtx_in_ldst (dest))
967 	mci->conflict = true;
968       return;
969     }
970 
971   if (true_dependence (dest, GET_MODE (dest), mci->mem))
972     mci->conflict = true;
973 }
974 
975 /* Return nonzero if the expression in X (a memory reference) is killed
976    in block BB before or after the insn with the LUID in UID_LIMIT.
977    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
978    before UID_LIMIT.
979 
980    To check the entire block, set UID_LIMIT to max_uid + 1 and
981    AVAIL_P to 0.  */
982 
983 static int
984 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
985 			int avail_p)
986 {
987   VEC (rtx,heap) *list = modify_mem_list[bb->index];
988   rtx setter;
989   unsigned ix;
990 
991   /* If this is a readonly then we aren't going to be changing it.  */
992   if (MEM_READONLY_P (x))
993     return 0;
994 
995   FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
996     {
997       struct mem_conflict_info mci;
998 
999       /* Ignore entries in the list that do not apply.  */
1000       if ((avail_p
1001 	   && DF_INSN_LUID (setter) < uid_limit)
1002 	  || (! avail_p
1003 	      && DF_INSN_LUID (setter) > uid_limit))
1004 	continue;
1005 
1006       /* If SETTER is a call everything is clobbered.  Note that calls
1007 	 to pure functions are never put on the list, so we need not
1008 	 worry about them.  */
1009       if (CALL_P (setter))
1010 	return 1;
1011 
1012       /* SETTER must be an INSN of some kind that sets memory.  Call
1013 	 note_stores to examine each hunk of memory that is modified.  */
1014       mci.mem = x;
1015       mci.conflict = false;
1016       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1017       if (mci.conflict)
1018 	return 1;
1019     }
1020   return 0;
1021 }
1022 
1023 /* Return nonzero if the operands of expression X are unchanged from
1024    the start of INSN's basic block up to but not including INSN.  */
1025 
1026 static int
1027 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1028 {
1029   return oprs_unchanged_p (x, insn, 0);
1030 }
1031 
1032 /* Return nonzero if the operands of expression X are unchanged from
1033    INSN to the end of INSN's basic block.  */
1034 
1035 static int
1036 oprs_available_p (const_rtx x, const_rtx insn)
1037 {
1038   return oprs_unchanged_p (x, insn, 1);
1039 }
1040 
1041 /* Hash expression X.
1042 
1043    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1044    indicating if a volatile operand is found or if the expression contains
1045    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1046    the current size of the hash table to be probed.  */
1047 
1048 static unsigned int
1049 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1050 	   int hash_table_size)
1051 {
1052   unsigned int hash;
1053 
1054   *do_not_record_p = 0;
1055 
1056   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1057   return hash % hash_table_size;
1058 }
1059 
1060 /* Return nonzero if exp1 is equivalent to exp2.  */
1061 
1062 static int
1063 expr_equiv_p (const_rtx x, const_rtx y)
1064 {
1065   return exp_equiv_p (x, y, 0, true);
1066 }
1067 
1068 /* Insert expression X in INSN in the hash TABLE.
1069    If it is already present, record it as the last occurrence in INSN's
1070    basic block.
1071 
1072    MODE is the mode of the value X is being stored into.
1073    It is only used if X is a CONST_INT.
1074 
1075    ANTIC_P is nonzero if X is an anticipatable expression.
1076    AVAIL_P is nonzero if X is an available expression.
1077 
1078    MAX_DISTANCE is the maximum distance in instructions this expression can
1079    be moved.  */
1080 
1081 static void
1082 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1083 		      int avail_p, int max_distance, struct hash_table_d *table)
1084 {
1085   int found, do_not_record_p;
1086   unsigned int hash;
1087   struct expr *cur_expr, *last_expr = NULL;
1088   struct occr *antic_occr, *avail_occr;
1089 
1090   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1091 
1092   /* Do not insert expression in table if it contains volatile operands,
1093      or if hash_expr determines the expression is something we don't want
1094      to or can't handle.  */
1095   if (do_not_record_p)
1096     return;
1097 
1098   cur_expr = table->table[hash];
1099   found = 0;
1100 
1101   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1102     {
1103       /* If the expression isn't found, save a pointer to the end of
1104 	 the list.  */
1105       last_expr = cur_expr;
1106       cur_expr = cur_expr->next_same_hash;
1107     }
1108 
1109   if (! found)
1110     {
1111       cur_expr = GOBNEW (struct expr);
1112       bytes_used += sizeof (struct expr);
1113       if (table->table[hash] == NULL)
1114 	/* This is the first pattern that hashed to this index.  */
1115 	table->table[hash] = cur_expr;
1116       else
1117 	/* Add EXPR to end of this hash chain.  */
1118 	last_expr->next_same_hash = cur_expr;
1119 
1120       /* Set the fields of the expr element.  */
1121       cur_expr->expr = x;
1122       cur_expr->bitmap_index = table->n_elems++;
1123       cur_expr->next_same_hash = NULL;
1124       cur_expr->antic_occr = NULL;
1125       cur_expr->avail_occr = NULL;
1126       gcc_assert (max_distance >= 0);
1127       cur_expr->max_distance = max_distance;
1128     }
1129   else
1130     gcc_assert (cur_expr->max_distance == max_distance);
1131 
1132   /* Now record the occurrence(s).  */
1133   if (antic_p)
1134     {
1135       antic_occr = cur_expr->antic_occr;
1136 
1137       if (antic_occr
1138 	  && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1139 	antic_occr = NULL;
1140 
1141       if (antic_occr)
1142 	/* Found another instance of the expression in the same basic block.
1143 	   Prefer the currently recorded one.  We want the first one in the
1144 	   block and the block is scanned from start to end.  */
1145 	; /* nothing to do */
1146       else
1147 	{
1148 	  /* First occurrence of this expression in this basic block.  */
1149 	  antic_occr = GOBNEW (struct occr);
1150 	  bytes_used += sizeof (struct occr);
1151 	  antic_occr->insn = insn;
1152 	  antic_occr->next = cur_expr->antic_occr;
1153 	  antic_occr->deleted_p = 0;
1154 	  cur_expr->antic_occr = antic_occr;
1155 	}
1156     }
1157 
1158   if (avail_p)
1159     {
1160       avail_occr = cur_expr->avail_occr;
1161 
1162       if (avail_occr
1163 	  && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1164 	{
1165 	  /* Found another instance of the expression in the same basic block.
1166 	     Prefer this occurrence to the currently recorded one.  We want
1167 	     the last one in the block and the block is scanned from start
1168 	     to end.  */
1169 	  avail_occr->insn = insn;
1170 	}
1171       else
1172 	{
1173 	  /* First occurrence of this expression in this basic block.  */
1174 	  avail_occr = GOBNEW (struct occr);
1175 	  bytes_used += sizeof (struct occr);
1176 	  avail_occr->insn = insn;
1177 	  avail_occr->next = cur_expr->avail_occr;
1178 	  avail_occr->deleted_p = 0;
1179 	  cur_expr->avail_occr = avail_occr;
1180 	}
1181     }
1182 }
1183 
1184 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1185 
1186 static void
1187 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1188 {
1189   rtx src = SET_SRC (set);
1190   rtx dest = SET_DEST (set);
1191   rtx note;
1192 
1193   if (GET_CODE (src) == CALL)
1194     hash_scan_call (src, insn, table);
1195 
1196   else if (REG_P (dest))
1197     {
1198       unsigned int regno = REGNO (dest);
1199       int max_distance = 0;
1200 
1201       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1202 
1203 	 This allows us to do a single GCSE pass and still eliminate
1204 	 redundant constants, addresses or other expressions that are
1205 	 constructed with multiple instructions.
1206 
1207 	 However, keep the original SRC if INSN is a simple reg-reg move.
1208 	 In this case, there will almost always be a REG_EQUAL note on the
1209 	 insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1210 	 for INSN, we miss copy propagation opportunities and we perform the
1211 	 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1212 	 do more than one PRE GCSE pass.
1213 
1214 	 Note that this does not impede profitable constant propagations.  We
1215 	 "look through" reg-reg sets in lookup_avail_set.  */
1216       note = find_reg_equal_equiv_note (insn);
1217       if (note != 0
1218 	  && REG_NOTE_KIND (note) == REG_EQUAL
1219 	  && !REG_P (src)
1220 	  && want_to_gcse_p (XEXP (note, 0), NULL))
1221 	src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1222 
1223       /* Only record sets of pseudo-regs in the hash table.  */
1224       if (regno >= FIRST_PSEUDO_REGISTER
1225 	  /* Don't GCSE something if we can't do a reg/reg copy.  */
1226 	  && can_copy_p (GET_MODE (dest))
1227 	  /* GCSE commonly inserts instruction after the insn.  We can't
1228 	     do that easily for EH edges so disable GCSE on these for now.  */
1229 	  /* ??? We can now easily create new EH landing pads at the
1230 	     gimple level, for splitting edges; there's no reason we
1231 	     can't do the same thing at the rtl level.  */
1232 	  && !can_throw_internal (insn)
1233 	  /* Is SET_SRC something we want to gcse?  */
1234 	  && want_to_gcse_p (src, &max_distance)
1235 	  /* Don't CSE a nop.  */
1236 	  && ! set_noop_p (set)
1237 	  /* Don't GCSE if it has attached REG_EQUIV note.
1238 	     At this point this only function parameters should have
1239 	     REG_EQUIV notes and if the argument slot is used somewhere
1240 	     explicitly, it means address of parameter has been taken,
1241 	     so we should not extend the lifetime of the pseudo.  */
1242 	  && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1243 	{
1244 	  /* An expression is not anticipatable if its operands are
1245 	     modified before this insn or if this is not the only SET in
1246 	     this insn.  The latter condition does not have to mean that
1247 	     SRC itself is not anticipatable, but we just will not be
1248 	     able to handle code motion of insns with multiple sets.  */
1249 	  int antic_p = oprs_anticipatable_p (src, insn)
1250 			&& !multiple_sets (insn);
1251 	  /* An expression is not available if its operands are
1252 	     subsequently modified, including this insn.  It's also not
1253 	     available if this is a branch, because we can't insert
1254 	     a set after the branch.  */
1255 	  int avail_p = (oprs_available_p (src, insn)
1256 			 && ! JUMP_P (insn));
1257 
1258 	  insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1259 				max_distance, table);
1260 	}
1261     }
1262   /* In case of store we want to consider the memory value as available in
1263      the REG stored in that memory. This makes it possible to remove
1264      redundant loads from due to stores to the same location.  */
1265   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1266       {
1267         unsigned int regno = REGNO (src);
1268 	int max_distance = 0;
1269 
1270 	/* Only record sets of pseudo-regs in the hash table.  */
1271         if (regno >= FIRST_PSEUDO_REGISTER
1272 	   /* Don't GCSE something if we can't do a reg/reg copy.  */
1273 	   && can_copy_p (GET_MODE (src))
1274 	   /* GCSE commonly inserts instruction after the insn.  We can't
1275 	      do that easily for EH edges so disable GCSE on these for now.  */
1276 	   && !can_throw_internal (insn)
1277 	   /* Is SET_DEST something we want to gcse?  */
1278 	   && want_to_gcse_p (dest, &max_distance)
1279 	   /* Don't CSE a nop.  */
1280 	   && ! set_noop_p (set)
1281 	   /* Don't GCSE if it has attached REG_EQUIV note.
1282 	      At this point this only function parameters should have
1283 	      REG_EQUIV notes and if the argument slot is used somewhere
1284 	      explicitly, it means address of parameter has been taken,
1285 	      so we should not extend the lifetime of the pseudo.  */
1286 	   && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1287 	       || ! MEM_P (XEXP (note, 0))))
1288              {
1289                /* Stores are never anticipatable.  */
1290                int antic_p = 0;
1291 	       /* An expression is not available if its operands are
1292 	          subsequently modified, including this insn.  It's also not
1293 	          available if this is a branch, because we can't insert
1294 	          a set after the branch.  */
1295                int avail_p = oprs_available_p (dest, insn)
1296 			     && ! JUMP_P (insn);
1297 
1298 	       /* Record the memory expression (DEST) in the hash table.  */
1299 	       insert_expr_in_table (dest, GET_MODE (dest), insn,
1300 				     antic_p, avail_p, max_distance, table);
1301              }
1302       }
1303 }
1304 
1305 static void
1306 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1307 		   struct hash_table_d *table ATTRIBUTE_UNUSED)
1308 {
1309   /* Currently nothing to do.  */
1310 }
1311 
1312 static void
1313 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1314 		struct hash_table_d *table ATTRIBUTE_UNUSED)
1315 {
1316   /* Currently nothing to do.  */
1317 }
1318 
1319 /* Process INSN and add hash table entries as appropriate.  */
1320 
1321 static void
1322 hash_scan_insn (rtx insn, struct hash_table_d *table)
1323 {
1324   rtx pat = PATTERN (insn);
1325   int i;
1326 
1327   /* Pick out the sets of INSN and for other forms of instructions record
1328      what's been modified.  */
1329 
1330   if (GET_CODE (pat) == SET)
1331     hash_scan_set (pat, insn, table);
1332 
1333   else if (GET_CODE (pat) == CLOBBER)
1334     hash_scan_clobber (pat, insn, table);
1335 
1336   else if (GET_CODE (pat) == CALL)
1337     hash_scan_call (pat, insn, table);
1338 
1339   else if (GET_CODE (pat) == PARALLEL)
1340     for (i = 0; i < XVECLEN (pat, 0); i++)
1341       {
1342 	rtx x = XVECEXP (pat, 0, i);
1343 
1344 	if (GET_CODE (x) == SET)
1345 	  hash_scan_set (x, insn, table);
1346 	else if (GET_CODE (x) == CLOBBER)
1347 	  hash_scan_clobber (x, insn, table);
1348 	else if (GET_CODE (x) == CALL)
1349 	  hash_scan_call (x, insn, table);
1350       }
1351 }
1352 
1353 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1354 
1355 static void
1356 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1357 {
1358   int i;
1359   /* Flattened out table, so it's printed in proper order.  */
1360   struct expr **flat_table;
1361   unsigned int *hash_val;
1362   struct expr *expr;
1363 
1364   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1365   hash_val = XNEWVEC (unsigned int, table->n_elems);
1366 
1367   for (i = 0; i < (int) table->size; i++)
1368     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1369       {
1370 	flat_table[expr->bitmap_index] = expr;
1371 	hash_val[expr->bitmap_index] = i;
1372       }
1373 
1374   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1375 	   name, table->size, table->n_elems);
1376 
1377   for (i = 0; i < (int) table->n_elems; i++)
1378     if (flat_table[i] != 0)
1379       {
1380 	expr = flat_table[i];
1381 	fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1382 		 expr->bitmap_index, hash_val[i], expr->max_distance);
1383 	print_rtl (file, expr->expr);
1384 	fprintf (file, "\n");
1385       }
1386 
1387   fprintf (file, "\n");
1388 
1389   free (flat_table);
1390   free (hash_val);
1391 }
1392 
1393 /* Record register first/last/block set information for REGNO in INSN.
1394 
1395    first_set records the first place in the block where the register
1396    is set and is used to compute "anticipatability".
1397 
1398    last_set records the last place in the block where the register
1399    is set and is used to compute "availability".
1400 
1401    last_bb records the block for which first_set and last_set are
1402    valid, as a quick test to invalidate them.  */
1403 
1404 static void
1405 record_last_reg_set_info (rtx insn, int regno)
1406 {
1407   struct reg_avail_info *info = &reg_avail_info[regno];
1408   int luid = DF_INSN_LUID (insn);
1409 
1410   info->last_set = luid;
1411   if (info->last_bb != current_bb)
1412     {
1413       info->last_bb = current_bb;
1414       info->first_set = luid;
1415     }
1416 }
1417 
1418 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1419    Note we store a pair of elements in the list, so they have to be
1420    taken off pairwise.  */
1421 
1422 static void
1423 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1424 		   void * v_insn)
1425 {
1426   rtx dest_addr, insn;
1427   int bb;
1428   modify_pair *pair;
1429 
1430   while (GET_CODE (dest) == SUBREG
1431       || GET_CODE (dest) == ZERO_EXTRACT
1432       || GET_CODE (dest) == STRICT_LOW_PART)
1433     dest = XEXP (dest, 0);
1434 
1435   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1436      that function calls are assumed to clobber memory, but are handled
1437      elsewhere.  */
1438 
1439   if (! MEM_P (dest))
1440     return;
1441 
1442   dest_addr = get_addr (XEXP (dest, 0));
1443   dest_addr = canon_rtx (dest_addr);
1444   insn = (rtx) v_insn;
1445   bb = BLOCK_FOR_INSN (insn)->index;
1446 
1447   pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
1448   pair->dest = dest;
1449   pair->dest_addr = dest_addr;
1450 }
1451 
1452 /* Record memory modification information for INSN.  We do not actually care
1453    about the memory location(s) that are set, or even how they are set (consider
1454    a CALL_INSN).  We merely need to record which insns modify memory.  */
1455 
1456 static void
1457 record_last_mem_set_info (rtx insn)
1458 {
1459   int bb = BLOCK_FOR_INSN (insn)->index;
1460 
1461   /* load_killed_in_block_p will handle the case of calls clobbering
1462      everything.  */
1463   VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1464   bitmap_set_bit (modify_mem_list_set, bb);
1465 
1466   if (CALL_P (insn))
1467     bitmap_set_bit (blocks_with_calls, bb);
1468   else
1469     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1470 }
1471 
1472 /* Called from compute_hash_table via note_stores to handle one
1473    SET or CLOBBER in an insn.  DATA is really the instruction in which
1474    the SET is taking place.  */
1475 
1476 static void
1477 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1478 {
1479   rtx last_set_insn = (rtx) data;
1480 
1481   if (GET_CODE (dest) == SUBREG)
1482     dest = SUBREG_REG (dest);
1483 
1484   if (REG_P (dest))
1485     record_last_reg_set_info (last_set_insn, REGNO (dest));
1486   else if (MEM_P (dest)
1487 	   /* Ignore pushes, they clobber nothing.  */
1488 	   && ! push_operand (dest, GET_MODE (dest)))
1489     record_last_mem_set_info (last_set_insn);
1490 }
1491 
1492 /* Top level function to create an expression hash table.
1493 
1494    Expression entries are placed in the hash table if
1495    - they are of the form (set (pseudo-reg) src),
1496    - src is something we want to perform GCSE on,
1497    - none of the operands are subsequently modified in the block
1498 
1499    Currently src must be a pseudo-reg or a const_int.
1500 
1501    TABLE is the table computed.  */
1502 
1503 static void
1504 compute_hash_table_work (struct hash_table_d *table)
1505 {
1506   int i;
1507 
1508   /* re-Cache any INSN_LIST nodes we have allocated.  */
1509   clear_modify_mem_tables ();
1510   /* Some working arrays used to track first and last set in each block.  */
1511   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1512 
1513   for (i = 0; i < max_reg_num (); ++i)
1514     reg_avail_info[i].last_bb = NULL;
1515 
1516   FOR_EACH_BB (current_bb)
1517     {
1518       rtx insn;
1519       unsigned int regno;
1520 
1521       /* First pass over the instructions records information used to
1522 	 determine when registers and memory are first and last set.  */
1523       FOR_BB_INSNS (current_bb, insn)
1524 	{
1525 	  if (!NONDEBUG_INSN_P (insn))
1526 	    continue;
1527 
1528 	  if (CALL_P (insn))
1529 	    {
1530 	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1531 		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1532 		  record_last_reg_set_info (insn, regno);
1533 
1534 	      if (! RTL_CONST_OR_PURE_CALL_P (insn))
1535 		record_last_mem_set_info (insn);
1536 	    }
1537 
1538 	  note_stores (PATTERN (insn), record_last_set_info, insn);
1539 	}
1540 
1541       /* The next pass builds the hash table.  */
1542       FOR_BB_INSNS (current_bb, insn)
1543 	if (NONDEBUG_INSN_P (insn))
1544 	  hash_scan_insn (insn, table);
1545     }
1546 
1547   free (reg_avail_info);
1548   reg_avail_info = NULL;
1549 }
1550 
1551 /* Allocate space for the set/expr hash TABLE.
1552    It is used to determine the number of buckets to use.  */
1553 
1554 static void
1555 alloc_hash_table (struct hash_table_d *table)
1556 {
1557   int n;
1558 
1559   n = get_max_insn_count ();
1560 
1561   table->size = n / 4;
1562   if (table->size < 11)
1563     table->size = 11;
1564 
1565   /* Attempt to maintain efficient use of hash table.
1566      Making it an odd number is simplest for now.
1567      ??? Later take some measurements.  */
1568   table->size |= 1;
1569   n = table->size * sizeof (struct expr *);
1570   table->table = GNEWVAR (struct expr *, n);
1571 }
1572 
1573 /* Free things allocated by alloc_hash_table.  */
1574 
1575 static void
1576 free_hash_table (struct hash_table_d *table)
1577 {
1578   free (table->table);
1579 }
1580 
1581 /* Compute the expression hash table TABLE.  */
1582 
1583 static void
1584 compute_hash_table (struct hash_table_d *table)
1585 {
1586   /* Initialize count of number of entries in hash table.  */
1587   table->n_elems = 0;
1588   memset (table->table, 0, table->size * sizeof (struct expr *));
1589 
1590   compute_hash_table_work (table);
1591 }
1592 
1593 /* Expression tracking support.  */
1594 
1595 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1596 static void
1597 clear_modify_mem_tables (void)
1598 {
1599   unsigned i;
1600   bitmap_iterator bi;
1601 
1602   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1603     {
1604       VEC_free (rtx, heap, modify_mem_list[i]);
1605       VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1606     }
1607   bitmap_clear (modify_mem_list_set);
1608   bitmap_clear (blocks_with_calls);
1609 }
1610 
1611 /* Release memory used by modify_mem_list_set.  */
1612 
1613 static void
1614 free_modify_mem_tables (void)
1615 {
1616   clear_modify_mem_tables ();
1617   free (modify_mem_list);
1618   free (canon_modify_mem_list);
1619   modify_mem_list = 0;
1620   canon_modify_mem_list = 0;
1621 }
1622 
1623 /* For each block, compute whether X is transparent.  X is either an
1624    expression or an assignment [though we don't care which, for this context
1625    an assignment is treated as an expression].  For each block where an
1626    element of X is modified, reset the INDX bit in BMAP.  */
1627 
1628 static void
1629 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1630 {
1631   int i, j;
1632   enum rtx_code code;
1633   const char *fmt;
1634 
1635   /* repeat is used to turn tail-recursion into iteration since GCC
1636      can't do it when there's no return value.  */
1637  repeat:
1638 
1639   if (x == 0)
1640     return;
1641 
1642   code = GET_CODE (x);
1643   switch (code)
1644     {
1645     case REG:
1646 	{
1647 	  df_ref def;
1648 	  for (def = DF_REG_DEF_CHAIN (REGNO (x));
1649 	       def;
1650 	       def = DF_REF_NEXT_REG (def))
1651 	    RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1652 	}
1653 
1654       return;
1655 
1656     case MEM:
1657       if (! MEM_READONLY_P (x))
1658 	{
1659 	  bitmap_iterator bi;
1660 	  unsigned bb_index;
1661 	  rtx x_addr;
1662 
1663 	  x_addr = get_addr (XEXP (x, 0));
1664 	  x_addr = canon_rtx (x_addr);
1665 
1666 	  /* First handle all the blocks with calls.  We don't need to
1667 	     do any list walking for them.  */
1668 	  EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1669 	    {
1670 	      RESET_BIT (bmap[bb_index], indx);
1671 	    }
1672 
1673 	  /* Now iterate over the blocks which have memory modifications
1674 	     but which do not have any calls.  */
1675 	  EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1676 					  blocks_with_calls,
1677 					  0, bb_index, bi)
1678 	    {
1679 	      VEC (modify_pair,heap) *list
1680 		= canon_modify_mem_list[bb_index];
1681 	      modify_pair *pair;
1682 	      unsigned ix;
1683 
1684 	      FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1685 		{
1686 		  rtx dest = pair->dest;
1687 		  rtx dest_addr = pair->dest_addr;
1688 
1689 		  if (canon_true_dependence (dest, GET_MODE (dest),
1690 					     dest_addr, x, x_addr))
1691 		    RESET_BIT (bmap[bb_index], indx);
1692 	        }
1693 	    }
1694 	}
1695 
1696       x = XEXP (x, 0);
1697       goto repeat;
1698 
1699     case PC:
1700     case CC0: /*FIXME*/
1701     case CONST:
1702     case CONST_INT:
1703     case CONST_DOUBLE:
1704     case CONST_FIXED:
1705     case CONST_VECTOR:
1706     case SYMBOL_REF:
1707     case LABEL_REF:
1708     case ADDR_VEC:
1709     case ADDR_DIFF_VEC:
1710       return;
1711 
1712     default:
1713       break;
1714     }
1715 
1716   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1717     {
1718       if (fmt[i] == 'e')
1719 	{
1720 	  /* If we are about to do the last recursive call
1721 	     needed at this level, change it into iteration.
1722 	     This function is called enough to be worth it.  */
1723 	  if (i == 0)
1724 	    {
1725 	      x = XEXP (x, i);
1726 	      goto repeat;
1727 	    }
1728 
1729 	  compute_transp (XEXP (x, i), indx, bmap);
1730 	}
1731       else if (fmt[i] == 'E')
1732 	for (j = 0; j < XVECLEN (x, i); j++)
1733 	  compute_transp (XVECEXP (x, i, j), indx, bmap);
1734     }
1735 }
1736 
1737 /* Compute PRE+LCM working variables.  */
1738 
1739 /* Local properties of expressions.  */
1740 
1741 /* Nonzero for expressions that are transparent in the block.  */
1742 static sbitmap *transp;
1743 
1744 /* Nonzero for expressions that are computed (available) in the block.  */
1745 static sbitmap *comp;
1746 
1747 /* Nonzero for expressions that are locally anticipatable in the block.  */
1748 static sbitmap *antloc;
1749 
1750 /* Nonzero for expressions where this block is an optimal computation
1751    point.  */
1752 static sbitmap *pre_optimal;
1753 
1754 /* Nonzero for expressions which are redundant in a particular block.  */
1755 static sbitmap *pre_redundant;
1756 
1757 /* Nonzero for expressions which should be inserted on a specific edge.  */
1758 static sbitmap *pre_insert_map;
1759 
1760 /* Nonzero for expressions which should be deleted in a specific block.  */
1761 static sbitmap *pre_delete_map;
1762 
1763 /* Allocate vars used for PRE analysis.  */
1764 
1765 static void
1766 alloc_pre_mem (int n_blocks, int n_exprs)
1767 {
1768   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1769   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1770   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1771 
1772   pre_optimal = NULL;
1773   pre_redundant = NULL;
1774   pre_insert_map = NULL;
1775   pre_delete_map = NULL;
1776   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1777 
1778   /* pre_insert and pre_delete are allocated later.  */
1779 }
1780 
1781 /* Free vars used for PRE analysis.  */
1782 
1783 static void
1784 free_pre_mem (void)
1785 {
1786   sbitmap_vector_free (transp);
1787   sbitmap_vector_free (comp);
1788 
1789   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1790 
1791   if (pre_optimal)
1792     sbitmap_vector_free (pre_optimal);
1793   if (pre_redundant)
1794     sbitmap_vector_free (pre_redundant);
1795   if (pre_insert_map)
1796     sbitmap_vector_free (pre_insert_map);
1797   if (pre_delete_map)
1798     sbitmap_vector_free (pre_delete_map);
1799 
1800   transp = comp = NULL;
1801   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1802 }
1803 
1804 /* Remove certain expressions from anticipatable and transparent
1805    sets of basic blocks that have incoming abnormal edge.
1806    For PRE remove potentially trapping expressions to avoid placing
1807    them on abnormal edges.  For hoisting remove memory references that
1808    can be clobbered by calls.  */
1809 
1810 static void
1811 prune_expressions (bool pre_p)
1812 {
1813   sbitmap prune_exprs;
1814   struct expr *expr;
1815   unsigned int ui;
1816   basic_block bb;
1817 
1818   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1819   sbitmap_zero (prune_exprs);
1820   for (ui = 0; ui < expr_hash_table.size; ui++)
1821     {
1822       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1823 	{
1824 	  /* Note potentially trapping expressions.  */
1825 	  if (may_trap_p (expr->expr))
1826 	    {
1827 	      SET_BIT (prune_exprs, expr->bitmap_index);
1828 	      continue;
1829 	    }
1830 
1831 	  if (!pre_p && MEM_P (expr->expr))
1832 	    /* Note memory references that can be clobbered by a call.
1833 	       We do not split abnormal edges in hoisting, so would
1834 	       a memory reference get hoisted along an abnormal edge,
1835 	       it would be placed /before/ the call.  Therefore, only
1836 	       constant memory references can be hoisted along abnormal
1837 	       edges.  */
1838 	    {
1839 	      if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1840 		  && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1841 		continue;
1842 
1843 	      if (MEM_READONLY_P (expr->expr)
1844 		  && !MEM_VOLATILE_P (expr->expr)
1845 		  && MEM_NOTRAP_P (expr->expr))
1846 		/* Constant memory reference, e.g., a PIC address.  */
1847 		continue;
1848 
1849 	      /* ??? Optimally, we would use interprocedural alias
1850 		 analysis to determine if this mem is actually killed
1851 		 by this call.  */
1852 
1853 	      SET_BIT (prune_exprs, expr->bitmap_index);
1854 	    }
1855 	}
1856     }
1857 
1858   FOR_EACH_BB (bb)
1859     {
1860       edge e;
1861       edge_iterator ei;
1862 
1863       /* If the current block is the destination of an abnormal edge, we
1864 	 kill all trapping (for PRE) and memory (for hoist) expressions
1865 	 because we won't be able to properly place the instruction on
1866 	 the edge.  So make them neither anticipatable nor transparent.
1867 	 This is fairly conservative.
1868 
1869 	 ??? For hoisting it may be necessary to check for set-and-jump
1870 	 instructions here, not just for abnormal edges.  The general problem
1871 	 is that when an expression cannot not be placed right at the end of
1872 	 a basic block we should account for any side-effects of a subsequent
1873 	 jump instructions that could clobber the expression.  It would
1874 	 be best to implement this check along the lines of
1875 	 hoist_expr_reaches_here_p where the target block is already known
1876 	 and, hence, there's no need to conservatively prune expressions on
1877 	 "intermediate" set-and-jump instructions.  */
1878       FOR_EACH_EDGE (e, ei, bb->preds)
1879 	if ((e->flags & EDGE_ABNORMAL)
1880 	    && (pre_p || CALL_P (BB_END (e->src))))
1881 	  {
1882 	    sbitmap_difference (antloc[bb->index],
1883 				antloc[bb->index], prune_exprs);
1884 	    sbitmap_difference (transp[bb->index],
1885 				transp[bb->index], prune_exprs);
1886 	    break;
1887 	  }
1888     }
1889 
1890   sbitmap_free (prune_exprs);
1891 }
1892 
1893 /* It may be necessary to insert a large number of insns on edges to
1894    make the existing occurrences of expressions fully redundant.  This
1895    routine examines the set of insertions and deletions and if the ratio
1896    of insertions to deletions is too high for a particular expression, then
1897    the expression is removed from the insertion/deletion sets.
1898 
1899    N_ELEMS is the number of elements in the hash table.  */
1900 
1901 static void
1902 prune_insertions_deletions (int n_elems)
1903 {
1904   sbitmap_iterator sbi;
1905   sbitmap prune_exprs;
1906 
1907   /* We always use I to iterate over blocks/edges and J to iterate over
1908      expressions.  */
1909   unsigned int i, j;
1910 
1911   /* Counts for the number of times an expression needs to be inserted and
1912      number of times an expression can be removed as a result.  */
1913   int *insertions = GCNEWVEC (int, n_elems);
1914   int *deletions = GCNEWVEC (int, n_elems);
1915 
1916   /* Set of expressions which require too many insertions relative to
1917      the number of deletions achieved.  We will prune these out of the
1918      insertion/deletion sets.  */
1919   prune_exprs = sbitmap_alloc (n_elems);
1920   sbitmap_zero (prune_exprs);
1921 
1922   /* Iterate over the edges counting the number of times each expression
1923      needs to be inserted.  */
1924   for (i = 0; i < (unsigned) n_edges; i++)
1925     {
1926       EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
1927 	insertions[j]++;
1928     }
1929 
1930   /* Similarly for deletions, but those occur in blocks rather than on
1931      edges.  */
1932   for (i = 0; i < (unsigned) last_basic_block; i++)
1933     {
1934       EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
1935 	deletions[j]++;
1936     }
1937 
1938   /* Now that we have accurate counts, iterate over the elements in the
1939      hash table and see if any need too many insertions relative to the
1940      number of evaluations that can be removed.  If so, mark them in
1941      PRUNE_EXPRS.  */
1942   for (j = 0; j < (unsigned) n_elems; j++)
1943     if (deletions[j]
1944 	&& ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1945       SET_BIT (prune_exprs, j);
1946 
1947   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1948   EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
1949     {
1950       for (i = 0; i < (unsigned) n_edges; i++)
1951 	RESET_BIT (pre_insert_map[i], j);
1952 
1953       for (i = 0; i < (unsigned) last_basic_block; i++)
1954 	RESET_BIT (pre_delete_map[i], j);
1955     }
1956 
1957   sbitmap_free (prune_exprs);
1958   free (insertions);
1959   free (deletions);
1960 }
1961 
1962 /* Top level routine to do the dataflow analysis needed by PRE.  */
1963 
1964 static struct edge_list *
1965 compute_pre_data (void)
1966 {
1967   struct edge_list *edge_list;
1968   basic_block bb;
1969 
1970   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1971   prune_expressions (true);
1972   sbitmap_vector_zero (ae_kill, last_basic_block);
1973 
1974   /* Compute ae_kill for each basic block using:
1975 
1976      ~(TRANSP | COMP)
1977   */
1978 
1979   FOR_EACH_BB (bb)
1980     {
1981       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1982       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1983     }
1984 
1985   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1986 			    ae_kill, &pre_insert_map, &pre_delete_map);
1987   sbitmap_vector_free (antloc);
1988   antloc = NULL;
1989   sbitmap_vector_free (ae_kill);
1990   ae_kill = NULL;
1991 
1992   prune_insertions_deletions (expr_hash_table.n_elems);
1993 
1994   return edge_list;
1995 }
1996 
1997 /* PRE utilities */
1998 
1999 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2000    block BB.
2001 
2002    VISITED is a pointer to a working buffer for tracking which BB's have
2003    been visited.  It is NULL for the top-level call.
2004 
2005    We treat reaching expressions that go through blocks containing the same
2006    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2007    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2008    2 as not reaching.  The intent is to improve the probability of finding
2009    only one reaching expression and to reduce register lifetimes by picking
2010    the closest such expression.  */
2011 
2012 static int
2013 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2014 			      basic_block bb, char *visited)
2015 {
2016   edge pred;
2017   edge_iterator ei;
2018 
2019   FOR_EACH_EDGE (pred, ei, bb->preds)
2020     {
2021       basic_block pred_bb = pred->src;
2022 
2023       if (pred->src == ENTRY_BLOCK_PTR
2024 	  /* Has predecessor has already been visited?  */
2025 	  || visited[pred_bb->index])
2026 	;/* Nothing to do.  */
2027 
2028       /* Does this predecessor generate this expression?  */
2029       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2030 	{
2031 	  /* Is this the occurrence we're looking for?
2032 	     Note that there's only one generating occurrence per block
2033 	     so we just need to check the block number.  */
2034 	  if (occr_bb == pred_bb)
2035 	    return 1;
2036 
2037 	  visited[pred_bb->index] = 1;
2038 	}
2039       /* Ignore this predecessor if it kills the expression.  */
2040       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
2041 	visited[pred_bb->index] = 1;
2042 
2043       /* Neither gen nor kill.  */
2044       else
2045 	{
2046 	  visited[pred_bb->index] = 1;
2047 	  if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2048 	    return 1;
2049 	}
2050     }
2051 
2052   /* All paths have been checked.  */
2053   return 0;
2054 }
2055 
2056 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2057    memory allocated for that function is returned.  */
2058 
2059 static int
2060 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2061 {
2062   int rval;
2063   char *visited = XCNEWVEC (char, last_basic_block);
2064 
2065   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2066 
2067   free (visited);
2068   return rval;
2069 }
2070 
2071 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2072 
2073 static rtx
2074 process_insert_insn (struct expr *expr)
2075 {
2076   rtx reg = expr->reaching_reg;
2077   /* Copy the expression to make sure we don't have any sharing issues.  */
2078   rtx exp = copy_rtx (expr->expr);
2079   rtx pat;
2080 
2081   start_sequence ();
2082 
2083   /* If the expression is something that's an operand, like a constant,
2084      just copy it to a register.  */
2085   if (general_operand (exp, GET_MODE (reg)))
2086     emit_move_insn (reg, exp);
2087 
2088   /* Otherwise, make a new insn to compute this expression and make sure the
2089      insn will be recognized (this also adds any needed CLOBBERs).  */
2090   else
2091     {
2092       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2093 
2094       if (insn_invalid_p (insn))
2095 	gcc_unreachable ();
2096     }
2097 
2098   pat = get_insns ();
2099   end_sequence ();
2100 
2101   return pat;
2102 }
2103 
2104 /* Add EXPR to the end of basic block BB.
2105 
2106    This is used by both the PRE and code hoisting.  */
2107 
2108 static void
2109 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2110 {
2111   rtx insn = BB_END (bb);
2112   rtx new_insn;
2113   rtx reg = expr->reaching_reg;
2114   int regno = REGNO (reg);
2115   rtx pat, pat_end;
2116 
2117   pat = process_insert_insn (expr);
2118   gcc_assert (pat && INSN_P (pat));
2119 
2120   pat_end = pat;
2121   while (NEXT_INSN (pat_end) != NULL_RTX)
2122     pat_end = NEXT_INSN (pat_end);
2123 
2124   /* If the last insn is a jump, insert EXPR in front [taking care to
2125      handle cc0, etc. properly].  Similarly we need to care trapping
2126      instructions in presence of non-call exceptions.  */
2127 
2128   if (JUMP_P (insn)
2129       || (NONJUMP_INSN_P (insn)
2130 	  && (!single_succ_p (bb)
2131 	      || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2132     {
2133 #ifdef HAVE_cc0
2134       rtx note;
2135 #endif
2136 
2137       /* If this is a jump table, then we can't insert stuff here.  Since
2138 	 we know the previous real insn must be the tablejump, we insert
2139 	 the new instruction just before the tablejump.  */
2140       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2141 	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2142 	insn = prev_active_insn (insn);
2143 
2144 #ifdef HAVE_cc0
2145       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2146 	 if cc0 isn't set.  */
2147       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2148       if (note)
2149 	insn = XEXP (note, 0);
2150       else
2151 	{
2152 	  rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2153 	  if (maybe_cc0_setter
2154 	      && INSN_P (maybe_cc0_setter)
2155 	      && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2156 	    insn = maybe_cc0_setter;
2157 	}
2158 #endif
2159       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2160       new_insn = emit_insn_before_noloc (pat, insn, bb);
2161     }
2162 
2163   /* Likewise if the last insn is a call, as will happen in the presence
2164      of exception handling.  */
2165   else if (CALL_P (insn)
2166 	   && (!single_succ_p (bb)
2167 	       || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2168     {
2169       /* Keeping in mind targets with small register classes and parameters
2170          in registers, we search backward and place the instructions before
2171 	 the first parameter is loaded.  Do this for everyone for consistency
2172 	 and a presumption that we'll get better code elsewhere as well.  */
2173 
2174       /* Since different machines initialize their parameter registers
2175 	 in different orders, assume nothing.  Collect the set of all
2176 	 parameter registers.  */
2177       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2178 
2179       /* If we found all the parameter loads, then we want to insert
2180 	 before the first parameter load.
2181 
2182 	 If we did not find all the parameter loads, then we might have
2183 	 stopped on the head of the block, which could be a CODE_LABEL.
2184 	 If we inserted before the CODE_LABEL, then we would be putting
2185 	 the insn in the wrong basic block.  In that case, put the insn
2186 	 after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2187       while (LABEL_P (insn)
2188 	     || NOTE_INSN_BASIC_BLOCK_P (insn))
2189 	insn = NEXT_INSN (insn);
2190 
2191       new_insn = emit_insn_before_noloc (pat, insn, bb);
2192     }
2193   else
2194     new_insn = emit_insn_after_noloc (pat, insn, bb);
2195 
2196   while (1)
2197     {
2198       if (INSN_P (pat))
2199 	add_label_notes (PATTERN (pat), new_insn);
2200       if (pat == pat_end)
2201 	break;
2202       pat = NEXT_INSN (pat);
2203     }
2204 
2205   gcse_create_count++;
2206 
2207   if (dump_file)
2208     {
2209       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2210 	       bb->index, INSN_UID (new_insn));
2211       fprintf (dump_file, "copying expression %d to reg %d\n",
2212 	       expr->bitmap_index, regno);
2213     }
2214 }
2215 
2216 /* Insert partially redundant expressions on edges in the CFG to make
2217    the expressions fully redundant.  */
2218 
2219 static int
2220 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2221 {
2222   int e, i, j, num_edges, set_size, did_insert = 0;
2223   sbitmap *inserted;
2224 
2225   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2226      if it reaches any of the deleted expressions.  */
2227 
2228   set_size = pre_insert_map[0]->size;
2229   num_edges = NUM_EDGES (edge_list);
2230   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2231   sbitmap_vector_zero (inserted, num_edges);
2232 
2233   for (e = 0; e < num_edges; e++)
2234     {
2235       int indx;
2236       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2237 
2238       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2239 	{
2240 	  SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2241 
2242 	  for (j = indx;
2243 	       insert && j < (int) expr_hash_table.n_elems;
2244 	       j++, insert >>= 1)
2245 	    if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2246 	      {
2247 		struct expr *expr = index_map[j];
2248 		struct occr *occr;
2249 
2250 		/* Now look at each deleted occurrence of this expression.  */
2251 		for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2252 		  {
2253 		    if (! occr->deleted_p)
2254 		      continue;
2255 
2256 		    /* Insert this expression on this edge if it would
2257 		       reach the deleted occurrence in BB.  */
2258 		    if (!TEST_BIT (inserted[e], j))
2259 		      {
2260 			rtx insn;
2261 			edge eg = INDEX_EDGE (edge_list, e);
2262 
2263 			/* We can't insert anything on an abnormal and
2264 			   critical edge, so we insert the insn at the end of
2265 			   the previous block. There are several alternatives
2266 			   detailed in Morgans book P277 (sec 10.5) for
2267 			   handling this situation.  This one is easiest for
2268 			   now.  */
2269 
2270 			if (eg->flags & EDGE_ABNORMAL)
2271 			  insert_insn_end_basic_block (index_map[j], bb);
2272 			else
2273 			  {
2274 			    insn = process_insert_insn (index_map[j]);
2275 			    insert_insn_on_edge (insn, eg);
2276 			  }
2277 
2278 			if (dump_file)
2279 			  {
2280 			    fprintf (dump_file, "PRE: edge (%d,%d), ",
2281 				     bb->index,
2282 				     INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2283 			    fprintf (dump_file, "copy expression %d\n",
2284 				     expr->bitmap_index);
2285 			  }
2286 
2287 			update_ld_motion_stores (expr);
2288 			SET_BIT (inserted[e], j);
2289 			did_insert = 1;
2290 			gcse_create_count++;
2291 		      }
2292 		  }
2293 	      }
2294 	}
2295     }
2296 
2297   sbitmap_vector_free (inserted);
2298   return did_insert;
2299 }
2300 
2301 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2302    Given "old_reg <- expr" (INSN), instead of adding after it
2303      reaching_reg <- old_reg
2304    it's better to do the following:
2305      reaching_reg <- expr
2306      old_reg      <- reaching_reg
2307    because this way copy propagation can discover additional PRE
2308    opportunities.  But if this fails, we try the old way.
2309    When "expr" is a store, i.e.
2310    given "MEM <- old_reg", instead of adding after it
2311      reaching_reg <- old_reg
2312    it's better to add it before as follows:
2313      reaching_reg <- old_reg
2314      MEM          <- reaching_reg.  */
2315 
2316 static void
2317 pre_insert_copy_insn (struct expr *expr, rtx insn)
2318 {
2319   rtx reg = expr->reaching_reg;
2320   int regno = REGNO (reg);
2321   int indx = expr->bitmap_index;
2322   rtx pat = PATTERN (insn);
2323   rtx set, first_set, new_insn;
2324   rtx old_reg;
2325   int i;
2326 
2327   /* This block matches the logic in hash_scan_insn.  */
2328   switch (GET_CODE (pat))
2329     {
2330     case SET:
2331       set = pat;
2332       break;
2333 
2334     case PARALLEL:
2335       /* Search through the parallel looking for the set whose
2336 	 source was the expression that we're interested in.  */
2337       first_set = NULL_RTX;
2338       set = NULL_RTX;
2339       for (i = 0; i < XVECLEN (pat, 0); i++)
2340 	{
2341 	  rtx x = XVECEXP (pat, 0, i);
2342 	  if (GET_CODE (x) == SET)
2343 	    {
2344 	      /* If the source was a REG_EQUAL or REG_EQUIV note, we
2345 		 may not find an equivalent expression, but in this
2346 		 case the PARALLEL will have a single set.  */
2347 	      if (first_set == NULL_RTX)
2348 		first_set = x;
2349 	      if (expr_equiv_p (SET_SRC (x), expr->expr))
2350 	        {
2351 	          set = x;
2352 	          break;
2353 	        }
2354 	    }
2355 	}
2356 
2357       gcc_assert (first_set);
2358       if (set == NULL_RTX)
2359         set = first_set;
2360       break;
2361 
2362     default:
2363       gcc_unreachable ();
2364     }
2365 
2366   if (REG_P (SET_DEST (set)))
2367     {
2368       old_reg = SET_DEST (set);
2369       /* Check if we can modify the set destination in the original insn.  */
2370       if (validate_change (insn, &SET_DEST (set), reg, 0))
2371         {
2372           new_insn = gen_move_insn (old_reg, reg);
2373           new_insn = emit_insn_after (new_insn, insn);
2374         }
2375       else
2376         {
2377           new_insn = gen_move_insn (reg, old_reg);
2378           new_insn = emit_insn_after (new_insn, insn);
2379         }
2380     }
2381   else /* This is possible only in case of a store to memory.  */
2382     {
2383       old_reg = SET_SRC (set);
2384       new_insn = gen_move_insn (reg, old_reg);
2385 
2386       /* Check if we can modify the set source in the original insn.  */
2387       if (validate_change (insn, &SET_SRC (set), reg, 0))
2388         new_insn = emit_insn_before (new_insn, insn);
2389       else
2390         new_insn = emit_insn_after (new_insn, insn);
2391     }
2392 
2393   gcse_create_count++;
2394 
2395   if (dump_file)
2396     fprintf (dump_file,
2397 	     "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2398 	      BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2399 	      INSN_UID (insn), regno);
2400 }
2401 
2402 /* Copy available expressions that reach the redundant expression
2403    to `reaching_reg'.  */
2404 
2405 static void
2406 pre_insert_copies (void)
2407 {
2408   unsigned int i, added_copy;
2409   struct expr *expr;
2410   struct occr *occr;
2411   struct occr *avail;
2412 
2413   /* For each available expression in the table, copy the result to
2414      `reaching_reg' if the expression reaches a deleted one.
2415 
2416      ??? The current algorithm is rather brute force.
2417      Need to do some profiling.  */
2418 
2419   for (i = 0; i < expr_hash_table.size; i++)
2420     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2421       {
2422 	/* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2423 	   we don't want to insert a copy here because the expression may not
2424 	   really be redundant.  So only insert an insn if the expression was
2425 	   deleted.  This test also avoids further processing if the
2426 	   expression wasn't deleted anywhere.  */
2427 	if (expr->reaching_reg == NULL)
2428 	  continue;
2429 
2430 	/* Set when we add a copy for that expression.  */
2431 	added_copy = 0;
2432 
2433 	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2434 	  {
2435 	    if (! occr->deleted_p)
2436 	      continue;
2437 
2438 	    for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2439 	      {
2440 		rtx insn = avail->insn;
2441 
2442 		/* No need to handle this one if handled already.  */
2443 		if (avail->copied_p)
2444 		  continue;
2445 
2446 		/* Don't handle this one if it's a redundant one.  */
2447 		if (INSN_DELETED_P (insn))
2448 		  continue;
2449 
2450 		/* Or if the expression doesn't reach the deleted one.  */
2451 		if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2452 					       expr,
2453 					       BLOCK_FOR_INSN (occr->insn)))
2454 		  continue;
2455 
2456                 added_copy = 1;
2457 
2458 		/* Copy the result of avail to reaching_reg.  */
2459 		pre_insert_copy_insn (expr, insn);
2460 		avail->copied_p = 1;
2461 	      }
2462 	  }
2463 
2464 	  if (added_copy)
2465             update_ld_motion_stores (expr);
2466       }
2467 }
2468 
2469 /* Emit move from SRC to DEST noting the equivalence with expression computed
2470    in INSN.  */
2471 
2472 static rtx
2473 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2474 {
2475   rtx new_rtx;
2476   rtx set = single_set (insn), set2;
2477   rtx note;
2478   rtx eqv;
2479 
2480   /* This should never fail since we're creating a reg->reg copy
2481      we've verified to be valid.  */
2482 
2483   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2484 
2485   /* Note the equivalence for local CSE pass.  */
2486   set2 = single_set (new_rtx);
2487   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2488     return new_rtx;
2489   if ((note = find_reg_equal_equiv_note (insn)))
2490     eqv = XEXP (note, 0);
2491   else
2492     eqv = SET_SRC (set);
2493 
2494   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2495 
2496   return new_rtx;
2497 }
2498 
2499 /* Delete redundant computations.
2500    Deletion is done by changing the insn to copy the `reaching_reg' of
2501    the expression into the result of the SET.  It is left to later passes
2502    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2503 
2504    Return nonzero if a change is made.  */
2505 
2506 static int
2507 pre_delete (void)
2508 {
2509   unsigned int i;
2510   int changed;
2511   struct expr *expr;
2512   struct occr *occr;
2513 
2514   changed = 0;
2515   for (i = 0; i < expr_hash_table.size; i++)
2516     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2517       {
2518 	int indx = expr->bitmap_index;
2519 
2520 	/* We only need to search antic_occr since we require ANTLOC != 0.  */
2521 	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2522 	  {
2523 	    rtx insn = occr->insn;
2524 	    rtx set;
2525 	    basic_block bb = BLOCK_FOR_INSN (insn);
2526 
2527 	    /* We only delete insns that have a single_set.  */
2528 	    if (TEST_BIT (pre_delete_map[bb->index], indx)
2529 		&& (set = single_set (insn)) != 0
2530                 && dbg_cnt (pre_insn))
2531 	      {
2532 		/* Create a pseudo-reg to store the result of reaching
2533 		   expressions into.  Get the mode for the new pseudo from
2534 		   the mode of the original destination pseudo.  */
2535 		if (expr->reaching_reg == NULL)
2536 		  expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2537 
2538 		gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2539 		delete_insn (insn);
2540 		occr->deleted_p = 1;
2541 		changed = 1;
2542 		gcse_subst_count++;
2543 
2544 		if (dump_file)
2545 		  {
2546 		    fprintf (dump_file,
2547 			     "PRE: redundant insn %d (expression %d) in ",
2548 			       INSN_UID (insn), indx);
2549 		    fprintf (dump_file, "bb %d, reaching reg is %d\n",
2550 			     bb->index, REGNO (expr->reaching_reg));
2551 		  }
2552 	      }
2553 	  }
2554       }
2555 
2556   return changed;
2557 }
2558 
2559 /* Perform GCSE optimizations using PRE.
2560    This is called by one_pre_gcse_pass after all the dataflow analysis
2561    has been done.
2562 
2563    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2564    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2565    Compiler Design and Implementation.
2566 
2567    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2568    thing about the classical approach is that it would try to use an existing
2569    reg.  If the register can't be adequately optimized [i.e. we introduce
2570    reload problems], one could add a pass here to propagate the new register
2571    through the block.
2572 
2573    ??? We don't handle single sets in PARALLELs because we're [currently] not
2574    able to copy the rest of the parallel when we insert copies to create full
2575    redundancies from partial redundancies.  However, there's no reason why we
2576    can't handle PARALLELs in the cases where there are no partial
2577    redundancies.  */
2578 
2579 static int
2580 pre_gcse (struct edge_list *edge_list)
2581 {
2582   unsigned int i;
2583   int did_insert, changed;
2584   struct expr **index_map;
2585   struct expr *expr;
2586 
2587   /* Compute a mapping from expression number (`bitmap_index') to
2588      hash table entry.  */
2589 
2590   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2591   for (i = 0; i < expr_hash_table.size; i++)
2592     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2593       index_map[expr->bitmap_index] = expr;
2594 
2595   /* Delete the redundant insns first so that
2596      - we know what register to use for the new insns and for the other
2597        ones with reaching expressions
2598      - we know which insns are redundant when we go to create copies  */
2599 
2600   changed = pre_delete ();
2601   did_insert = pre_edge_insert (edge_list, index_map);
2602 
2603   /* In other places with reaching expressions, copy the expression to the
2604      specially allocated pseudo-reg that reaches the redundant expr.  */
2605   pre_insert_copies ();
2606   if (did_insert)
2607     {
2608       commit_edge_insertions ();
2609       changed = 1;
2610     }
2611 
2612   free (index_map);
2613   return changed;
2614 }
2615 
2616 /* Top level routine to perform one PRE GCSE pass.
2617 
2618    Return nonzero if a change was made.  */
2619 
2620 static int
2621 one_pre_gcse_pass (void)
2622 {
2623   int changed = 0;
2624 
2625   gcse_subst_count = 0;
2626   gcse_create_count = 0;
2627 
2628   /* Return if there's nothing to do, or it is too expensive.  */
2629   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2630       || is_too_expensive (_("PRE disabled")))
2631     return 0;
2632 
2633   /* We need alias.  */
2634   init_alias_analysis ();
2635 
2636   bytes_used = 0;
2637   gcc_obstack_init (&gcse_obstack);
2638   alloc_gcse_mem ();
2639 
2640   alloc_hash_table (&expr_hash_table);
2641   add_noreturn_fake_exit_edges ();
2642   if (flag_gcse_lm)
2643     compute_ld_motion_mems ();
2644 
2645   compute_hash_table (&expr_hash_table);
2646   if (flag_gcse_lm)
2647     trim_ld_motion_mems ();
2648   if (dump_file)
2649     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2650 
2651   if (expr_hash_table.n_elems > 0)
2652     {
2653       struct edge_list *edge_list;
2654       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2655       edge_list = compute_pre_data ();
2656       changed |= pre_gcse (edge_list);
2657       free_edge_list (edge_list);
2658       free_pre_mem ();
2659     }
2660 
2661   if (flag_gcse_lm)
2662     free_ld_motion_mems ();
2663   remove_fake_exit_edges ();
2664   free_hash_table (&expr_hash_table);
2665 
2666   free_gcse_mem ();
2667   obstack_free (&gcse_obstack, NULL);
2668 
2669   /* We are finished with alias.  */
2670   end_alias_analysis ();
2671 
2672   if (dump_file)
2673     {
2674       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2675 	       current_function_name (), n_basic_blocks, bytes_used);
2676       fprintf (dump_file, "%d substs, %d insns created\n",
2677 	       gcse_subst_count, gcse_create_count);
2678     }
2679 
2680   return changed;
2681 }
2682 
2683 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2684    to INSN.  If such notes are added to an insn which references a
2685    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2686    that note, because the following loop optimization pass requires
2687    them.  */
2688 
2689 /* ??? If there was a jump optimization pass after gcse and before loop,
2690    then we would not need to do this here, because jump would add the
2691    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2692 
2693 static void
2694 add_label_notes (rtx x, rtx insn)
2695 {
2696   enum rtx_code code = GET_CODE (x);
2697   int i, j;
2698   const char *fmt;
2699 
2700   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2701     {
2702       /* This code used to ignore labels that referred to dispatch tables to
2703 	 avoid flow generating (slightly) worse code.
2704 
2705 	 We no longer ignore such label references (see LABEL_REF handling in
2706 	 mark_jump_label for additional information).  */
2707 
2708       /* There's no reason for current users to emit jump-insns with
2709 	 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2710 	 notes.  */
2711       gcc_assert (!JUMP_P (insn));
2712       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2713 
2714       if (LABEL_P (XEXP (x, 0)))
2715 	LABEL_NUSES (XEXP (x, 0))++;
2716 
2717       return;
2718     }
2719 
2720   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2721     {
2722       if (fmt[i] == 'e')
2723 	add_label_notes (XEXP (x, i), insn);
2724       else if (fmt[i] == 'E')
2725 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2726 	  add_label_notes (XVECEXP (x, i, j), insn);
2727     }
2728 }
2729 
2730 /* Code Hoisting variables and subroutines.  */
2731 
2732 /* Very busy expressions.  */
2733 static sbitmap *hoist_vbein;
2734 static sbitmap *hoist_vbeout;
2735 
2736 /* ??? We could compute post dominators and run this algorithm in
2737    reverse to perform tail merging, doing so would probably be
2738    more effective than the tail merging code in jump.c.
2739 
2740    It's unclear if tail merging could be run in parallel with
2741    code hoisting.  It would be nice.  */
2742 
2743 /* Allocate vars used for code hoisting analysis.  */
2744 
2745 static void
2746 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2747 {
2748   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2749   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2750   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2751 
2752   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2753   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2754 }
2755 
2756 /* Free vars used for code hoisting analysis.  */
2757 
2758 static void
2759 free_code_hoist_mem (void)
2760 {
2761   sbitmap_vector_free (antloc);
2762   sbitmap_vector_free (transp);
2763   sbitmap_vector_free (comp);
2764 
2765   sbitmap_vector_free (hoist_vbein);
2766   sbitmap_vector_free (hoist_vbeout);
2767 
2768   free_dominance_info (CDI_DOMINATORS);
2769 }
2770 
2771 /* Compute the very busy expressions at entry/exit from each block.
2772 
2773    An expression is very busy if all paths from a given point
2774    compute the expression.  */
2775 
2776 static void
2777 compute_code_hoist_vbeinout (void)
2778 {
2779   int changed, passes;
2780   basic_block bb;
2781 
2782   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
2783   sbitmap_vector_zero (hoist_vbein, last_basic_block);
2784 
2785   passes = 0;
2786   changed = 1;
2787 
2788   while (changed)
2789     {
2790       changed = 0;
2791 
2792       /* We scan the blocks in the reverse order to speed up
2793 	 the convergence.  */
2794       FOR_EACH_BB_REVERSE (bb)
2795 	{
2796 	  if (bb->next_bb != EXIT_BLOCK_PTR)
2797 	    {
2798 	      sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
2799 					     hoist_vbein, bb->index);
2800 
2801 	      /* Include expressions in VBEout that are calculated
2802 		 in BB and available at its end.  */
2803 	      sbitmap_a_or_b (hoist_vbeout[bb->index],
2804 			      hoist_vbeout[bb->index], comp[bb->index]);
2805 	    }
2806 
2807 	  changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
2808 					      antloc[bb->index],
2809 					      hoist_vbeout[bb->index],
2810 					      transp[bb->index]);
2811 	}
2812 
2813       passes++;
2814     }
2815 
2816   if (dump_file)
2817     {
2818       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2819 
2820       FOR_EACH_BB (bb)
2821         {
2822 	  fprintf (dump_file, "vbein (%d): ", bb->index);
2823 	  dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
2824 	  fprintf (dump_file, "vbeout(%d): ", bb->index);
2825 	  dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
2826 	}
2827     }
2828 }
2829 
2830 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2831 
2832 static void
2833 compute_code_hoist_data (void)
2834 {
2835   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2836   prune_expressions (false);
2837   compute_code_hoist_vbeinout ();
2838   calculate_dominance_info (CDI_DOMINATORS);
2839   if (dump_file)
2840     fprintf (dump_file, "\n");
2841 }
2842 
2843 /* Determine if the expression identified by EXPR_INDEX would
2844    reach BB unimpared if it was placed at the end of EXPR_BB.
2845    Stop the search if the expression would need to be moved more
2846    than DISTANCE instructions.
2847 
2848    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2849    to me that the expression must either be computed or transparent in
2850    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2851    would allow the expression to be hoisted out of loops, even if
2852    the expression wasn't a loop invariant.
2853 
2854    Contrast this to reachability for PRE where an expression is
2855    considered reachable if *any* path reaches instead of *all*
2856    paths.  */
2857 
2858 static int
2859 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
2860 			   char *visited, int distance, int *bb_size)
2861 {
2862   edge pred;
2863   edge_iterator ei;
2864   int visited_allocated_locally = 0;
2865 
2866   /* Terminate the search if distance, for which EXPR is allowed to move,
2867      is exhausted.  */
2868   if (distance > 0)
2869     {
2870       distance -= bb_size[bb->index];
2871 
2872       if (distance <= 0)
2873 	return 0;
2874     }
2875   else
2876     gcc_assert (distance == 0);
2877 
2878   if (visited == NULL)
2879     {
2880       visited_allocated_locally = 1;
2881       visited = XCNEWVEC (char, last_basic_block);
2882     }
2883 
2884   FOR_EACH_EDGE (pred, ei, bb->preds)
2885     {
2886       basic_block pred_bb = pred->src;
2887 
2888       if (pred->src == ENTRY_BLOCK_PTR)
2889 	break;
2890       else if (pred_bb == expr_bb)
2891 	continue;
2892       else if (visited[pred_bb->index])
2893 	continue;
2894 
2895       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2896 	break;
2897 
2898       /* Not killed.  */
2899       else
2900 	{
2901 	  visited[pred_bb->index] = 1;
2902 	  if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
2903 					   visited, distance, bb_size))
2904 	    break;
2905 	}
2906     }
2907   if (visited_allocated_locally)
2908     free (visited);
2909 
2910   return (pred == NULL);
2911 }
2912 
2913 /* Find occurence in BB.  */
2914 
2915 static struct occr *
2916 find_occr_in_bb (struct occr *occr, basic_block bb)
2917 {
2918   /* Find the right occurrence of this expression.  */
2919   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
2920     occr = occr->next;
2921 
2922   return occr;
2923 }
2924 
2925 /* Actually perform code hoisting.  */
2926 
2927 static int
2928 hoist_code (void)
2929 {
2930   basic_block bb, dominated;
2931   VEC (basic_block, heap) *dom_tree_walk;
2932   unsigned int dom_tree_walk_index;
2933   VEC (basic_block, heap) *domby;
2934   unsigned int i,j;
2935   struct expr **index_map;
2936   struct expr *expr;
2937   int *to_bb_head;
2938   int *bb_size;
2939   int changed = 0;
2940 
2941   /* Compute a mapping from expression number (`bitmap_index') to
2942      hash table entry.  */
2943 
2944   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2945   for (i = 0; i < expr_hash_table.size; i++)
2946     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2947       index_map[expr->bitmap_index] = expr;
2948 
2949   /* Calculate sizes of basic blocks and note how far
2950      each instruction is from the start of its block.  We then use this
2951      data to restrict distance an expression can travel.  */
2952 
2953   to_bb_head = XCNEWVEC (int, get_max_uid ());
2954   bb_size = XCNEWVEC (int, last_basic_block);
2955 
2956   FOR_EACH_BB (bb)
2957     {
2958       rtx insn;
2959       int to_head;
2960 
2961       to_head = 0;
2962       FOR_BB_INSNS (bb, insn)
2963 	{
2964 	  /* Don't count debug instructions to avoid them affecting
2965 	     decision choices.  */
2966 	  if (NONDEBUG_INSN_P (insn))
2967 	    to_bb_head[INSN_UID (insn)] = to_head++;
2968 	}
2969 
2970       bb_size[bb->index] = to_head;
2971     }
2972 
2973   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
2974 	      && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
2975 		  == ENTRY_BLOCK_PTR->next_bb));
2976 
2977   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
2978 					    ENTRY_BLOCK_PTR->next_bb);
2979 
2980   /* Walk over each basic block looking for potentially hoistable
2981      expressions, nothing gets hoisted from the entry block.  */
2982   FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2983     {
2984       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
2985 
2986       if (VEC_length (basic_block, domby) == 0)
2987 	continue;
2988 
2989       /* Examine each expression that is very busy at the exit of this
2990 	 block.  These are the potentially hoistable expressions.  */
2991       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
2992 	{
2993 	  if (TEST_BIT (hoist_vbeout[bb->index], i))
2994 	    {
2995 	      /* Current expression.  */
2996 	      struct expr *expr = index_map[i];
2997 	      /* Number of occurences of EXPR that can be hoisted to BB.  */
2998 	      int hoistable = 0;
2999 	      /* Basic blocks that have occurences reachable from BB.  */
3000 	      bitmap_head _from_bbs, *from_bbs = &_from_bbs;
3001 	      /* Occurences reachable from BB.  */
3002 	      VEC (occr_t, heap) *occrs_to_hoist = NULL;
3003 	      /* We want to insert the expression into BB only once, so
3004 		 note when we've inserted it.  */
3005 	      int insn_inserted_p;
3006 	      occr_t occr;
3007 
3008 	      bitmap_initialize (from_bbs, 0);
3009 
3010 	      /* If an expression is computed in BB and is available at end of
3011 		 BB, hoist all occurences dominated by BB to BB.  */
3012 	      if (TEST_BIT (comp[bb->index], i))
3013 		{
3014 		  occr = find_occr_in_bb (expr->antic_occr, bb);
3015 
3016 		  if (occr)
3017 		    {
3018 		      /* An occurence might've been already deleted
3019 			 while processing a dominator of BB.  */
3020 		      if (!occr->deleted_p)
3021 			{
3022 			  gcc_assert (NONDEBUG_INSN_P (occr->insn));
3023 			  hoistable++;
3024 			}
3025 		    }
3026 		  else
3027 		    hoistable++;
3028 		}
3029 
3030 	      /* We've found a potentially hoistable expression, now
3031 		 we look at every block BB dominates to see if it
3032 		 computes the expression.  */
3033 	      FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3034 		{
3035 		  int max_distance;
3036 
3037 		  /* Ignore self dominance.  */
3038 		  if (bb == dominated)
3039 		    continue;
3040 		  /* We've found a dominated block, now see if it computes
3041 		     the busy expression and whether or not moving that
3042 		     expression to the "beginning" of that block is safe.  */
3043 		  if (!TEST_BIT (antloc[dominated->index], i))
3044 		    continue;
3045 
3046 		  occr = find_occr_in_bb (expr->antic_occr, dominated);
3047 		  gcc_assert (occr);
3048 
3049 		  /* An occurence might've been already deleted
3050 		     while processing a dominator of BB.  */
3051 		  if (occr->deleted_p)
3052 		    continue;
3053 		  gcc_assert (NONDEBUG_INSN_P (occr->insn));
3054 
3055 		  max_distance = expr->max_distance;
3056 		  if (max_distance > 0)
3057 		    /* Adjust MAX_DISTANCE to account for the fact that
3058 		       OCCR won't have to travel all of DOMINATED, but
3059 		       only part of it.  */
3060 		    max_distance += (bb_size[dominated->index]
3061 				     - to_bb_head[INSN_UID (occr->insn)]);
3062 
3063 		  /* Note if the expression would reach the dominated block
3064 		     unimpared if it was placed at the end of BB.
3065 
3066 		     Keep track of how many times this expression is hoistable
3067 		     from a dominated block into BB.  */
3068 		  if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
3069 						 max_distance, bb_size))
3070 		    {
3071 		      hoistable++;
3072 		      VEC_safe_push (occr_t, heap,
3073 				     occrs_to_hoist, occr);
3074 		      bitmap_set_bit (from_bbs, dominated->index);
3075 		    }
3076 		}
3077 
3078 	      /* If we found more than one hoistable occurrence of this
3079 		 expression, then note it in the vector of expressions to
3080 		 hoist.  It makes no sense to hoist things which are computed
3081 		 in only one BB, and doing so tends to pessimize register
3082 		 allocation.  One could increase this value to try harder
3083 		 to avoid any possible code expansion due to register
3084 		 allocation issues; however experiments have shown that
3085 		 the vast majority of hoistable expressions are only movable
3086 		 from two successors, so raising this threshold is likely
3087 		 to nullify any benefit we get from code hoisting.  */
3088 	      if (hoistable > 1 && dbg_cnt (hoist_insn))
3089 		{
3090 		  /* If (hoistable != VEC_length), then there is
3091 		     an occurence of EXPR in BB itself.  Don't waste
3092 		     time looking for LCA in this case.  */
3093 		  if ((unsigned) hoistable
3094 		      == VEC_length (occr_t, occrs_to_hoist))
3095 		    {
3096 		      basic_block lca;
3097 
3098 		      lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3099 							      from_bbs);
3100 		      if (lca != bb)
3101 			/* Punt, it's better to hoist these occurences to
3102 			   LCA.  */
3103 			VEC_free (occr_t, heap, occrs_to_hoist);
3104 		    }
3105 		}
3106 	      else
3107 		/* Punt, no point hoisting a single occurence.  */
3108 		VEC_free (occr_t, heap, occrs_to_hoist);
3109 
3110 	      insn_inserted_p = 0;
3111 
3112 	      /* Walk through occurences of I'th expressions we want
3113 		 to hoist to BB and make the transformations.  */
3114 	      FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3115 		{
3116 		  rtx insn;
3117 		  rtx set;
3118 
3119 		  gcc_assert (!occr->deleted_p);
3120 
3121 		  insn = occr->insn;
3122 		  set = single_set (insn);
3123 		  gcc_assert (set);
3124 
3125 		  /* Create a pseudo-reg to store the result of reaching
3126 		     expressions into.  Get the mode for the new pseudo
3127 		     from the mode of the original destination pseudo.
3128 
3129 		     It is important to use new pseudos whenever we
3130 		     emit a set.  This will allow reload to use
3131 		     rematerialization for such registers.  */
3132 		  if (!insn_inserted_p)
3133 		    expr->reaching_reg
3134 		      = gen_reg_rtx_and_attrs (SET_DEST (set));
3135 
3136 		  gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3137 					insn);
3138 		  delete_insn (insn);
3139 		  occr->deleted_p = 1;
3140 		  changed = 1;
3141 		  gcse_subst_count++;
3142 
3143 		  if (!insn_inserted_p)
3144 		    {
3145 		      insert_insn_end_basic_block (expr, bb);
3146 		      insn_inserted_p = 1;
3147 		    }
3148 		}
3149 
3150 	      VEC_free (occr_t, heap, occrs_to_hoist);
3151 	      bitmap_clear (from_bbs);
3152 	    }
3153 	}
3154       VEC_free (basic_block, heap, domby);
3155     }
3156 
3157   VEC_free (basic_block, heap, dom_tree_walk);
3158   free (bb_size);
3159   free (to_bb_head);
3160   free (index_map);
3161 
3162   return changed;
3163 }
3164 
3165 /* Top level routine to perform one code hoisting (aka unification) pass
3166 
3167    Return nonzero if a change was made.  */
3168 
3169 static int
3170 one_code_hoisting_pass (void)
3171 {
3172   int changed = 0;
3173 
3174   gcse_subst_count = 0;
3175   gcse_create_count = 0;
3176 
3177   /* Return if there's nothing to do, or it is too expensive.  */
3178   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3179       || is_too_expensive (_("GCSE disabled")))
3180     return 0;
3181 
3182   doing_code_hoisting_p = true;
3183 
3184   /* We need alias.  */
3185   init_alias_analysis ();
3186 
3187   bytes_used = 0;
3188   gcc_obstack_init (&gcse_obstack);
3189   alloc_gcse_mem ();
3190 
3191   alloc_hash_table (&expr_hash_table);
3192   compute_hash_table (&expr_hash_table);
3193   if (dump_file)
3194     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3195 
3196   if (expr_hash_table.n_elems > 0)
3197     {
3198       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3199       compute_code_hoist_data ();
3200       changed = hoist_code ();
3201       free_code_hoist_mem ();
3202     }
3203 
3204   free_hash_table (&expr_hash_table);
3205   free_gcse_mem ();
3206   obstack_free (&gcse_obstack, NULL);
3207 
3208   /* We are finished with alias.  */
3209   end_alias_analysis ();
3210 
3211   if (dump_file)
3212     {
3213       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3214 	       current_function_name (), n_basic_blocks, bytes_used);
3215       fprintf (dump_file, "%d substs, %d insns created\n",
3216 	       gcse_subst_count, gcse_create_count);
3217     }
3218 
3219   doing_code_hoisting_p = false;
3220 
3221   return changed;
3222 }
3223 
3224 /*  Here we provide the things required to do store motion towards the exit.
3225     In order for this to be effective, gcse also needed to be taught how to
3226     move a load when it is killed only by a store to itself.
3227 
3228 	    int i;
3229 	    float a[10];
3230 
3231 	    void foo(float scale)
3232 	    {
3233 	      for (i=0; i<10; i++)
3234 		a[i] *= scale;
3235 	    }
3236 
3237     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3238     the load out since its live around the loop, and stored at the bottom
3239     of the loop.
3240 
3241       The 'Load Motion' referred to and implemented in this file is
3242     an enhancement to gcse which when using edge based LCM, recognizes
3243     this situation and allows gcse to move the load out of the loop.
3244 
3245       Once gcse has hoisted the load, store motion can then push this
3246     load towards the exit, and we end up with no loads or stores of 'i'
3247     in the loop.  */
3248 
3249 static hashval_t
3250 pre_ldst_expr_hash (const void *p)
3251 {
3252   int do_not_record_p = 0;
3253   const struct ls_expr *const x = (const struct ls_expr *) p;
3254   return
3255     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3256 }
3257 
3258 static int
3259 pre_ldst_expr_eq (const void *p1, const void *p2)
3260 {
3261   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3262     *const ptr2 = (const struct ls_expr *) p2;
3263   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3264 }
3265 
3266 /* This will search the ldst list for a matching expression. If it
3267    doesn't find one, we create one and initialize it.  */
3268 
3269 static struct ls_expr *
3270 ldst_entry (rtx x)
3271 {
3272   int do_not_record_p = 0;
3273   struct ls_expr * ptr;
3274   unsigned int hash;
3275   void **slot;
3276   struct ls_expr e;
3277 
3278   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3279 		   NULL,  /*have_reg_qty=*/false);
3280 
3281   e.pattern = x;
3282   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3283   if (*slot)
3284     return (struct ls_expr *)*slot;
3285 
3286   ptr = XNEW (struct ls_expr);
3287 
3288   ptr->next         = pre_ldst_mems;
3289   ptr->expr         = NULL;
3290   ptr->pattern      = x;
3291   ptr->pattern_regs = NULL_RTX;
3292   ptr->loads        = NULL_RTX;
3293   ptr->stores       = NULL_RTX;
3294   ptr->reaching_reg = NULL_RTX;
3295   ptr->invalid      = 0;
3296   ptr->index        = 0;
3297   ptr->hash_index   = hash;
3298   pre_ldst_mems     = ptr;
3299   *slot = ptr;
3300 
3301   return ptr;
3302 }
3303 
3304 /* Free up an individual ldst entry.  */
3305 
3306 static void
3307 free_ldst_entry (struct ls_expr * ptr)
3308 {
3309   free_INSN_LIST_list (& ptr->loads);
3310   free_INSN_LIST_list (& ptr->stores);
3311 
3312   free (ptr);
3313 }
3314 
3315 /* Free up all memory associated with the ldst list.  */
3316 
3317 static void
3318 free_ld_motion_mems (void)
3319 {
3320   if (pre_ldst_table)
3321     htab_delete (pre_ldst_table);
3322   pre_ldst_table = NULL;
3323 
3324   while (pre_ldst_mems)
3325     {
3326       struct ls_expr * tmp = pre_ldst_mems;
3327 
3328       pre_ldst_mems = pre_ldst_mems->next;
3329 
3330       free_ldst_entry (tmp);
3331     }
3332 
3333   pre_ldst_mems = NULL;
3334 }
3335 
3336 /* Dump debugging info about the ldst list.  */
3337 
3338 static void
3339 print_ldst_list (FILE * file)
3340 {
3341   struct ls_expr * ptr;
3342 
3343   fprintf (file, "LDST list: \n");
3344 
3345   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3346     {
3347       fprintf (file, "  Pattern (%3d): ", ptr->index);
3348 
3349       print_rtl (file, ptr->pattern);
3350 
3351       fprintf (file, "\n	 Loads : ");
3352 
3353       if (ptr->loads)
3354 	print_rtl (file, ptr->loads);
3355       else
3356 	fprintf (file, "(nil)");
3357 
3358       fprintf (file, "\n	Stores : ");
3359 
3360       if (ptr->stores)
3361 	print_rtl (file, ptr->stores);
3362       else
3363 	fprintf (file, "(nil)");
3364 
3365       fprintf (file, "\n\n");
3366     }
3367 
3368   fprintf (file, "\n");
3369 }
3370 
3371 /* Returns 1 if X is in the list of ldst only expressions.  */
3372 
3373 static struct ls_expr *
3374 find_rtx_in_ldst (rtx x)
3375 {
3376   struct ls_expr e;
3377   void **slot;
3378   if (!pre_ldst_table)
3379     return NULL;
3380   e.pattern = x;
3381   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3382   if (!slot || ((struct ls_expr *)*slot)->invalid)
3383     return NULL;
3384   return (struct ls_expr *) *slot;
3385 }
3386 
3387 /* Load Motion for loads which only kill themselves.  */
3388 
3389 /* Return true if x, a MEM, is a simple access with no side effects.
3390    These are the types of loads we consider for the ld_motion list,
3391    otherwise we let the usual aliasing take care of it.  */
3392 
3393 static int
3394 simple_mem (const_rtx x)
3395 {
3396   if (MEM_VOLATILE_P (x))
3397     return 0;
3398 
3399   if (GET_MODE (x) == BLKmode)
3400     return 0;
3401 
3402   /* If we are handling exceptions, we must be careful with memory references
3403      that may trap.  If we are not, the behavior is undefined, so we may just
3404      continue.  */
3405   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3406     return 0;
3407 
3408   if (side_effects_p (x))
3409     return 0;
3410 
3411   /* Do not consider function arguments passed on stack.  */
3412   if (reg_mentioned_p (stack_pointer_rtx, x))
3413     return 0;
3414 
3415   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3416     return 0;
3417 
3418   return 1;
3419 }
3420 
3421 /* Make sure there isn't a buried reference in this pattern anywhere.
3422    If there is, invalidate the entry for it since we're not capable
3423    of fixing it up just yet.. We have to be sure we know about ALL
3424    loads since the aliasing code will allow all entries in the
3425    ld_motion list to not-alias itself.  If we miss a load, we will get
3426    the wrong value since gcse might common it and we won't know to
3427    fix it up.  */
3428 
3429 static void
3430 invalidate_any_buried_refs (rtx x)
3431 {
3432   const char * fmt;
3433   int i, j;
3434   struct ls_expr * ptr;
3435 
3436   /* Invalidate it in the list.  */
3437   if (MEM_P (x) && simple_mem (x))
3438     {
3439       ptr = ldst_entry (x);
3440       ptr->invalid = 1;
3441     }
3442 
3443   /* Recursively process the insn.  */
3444   fmt = GET_RTX_FORMAT (GET_CODE (x));
3445 
3446   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3447     {
3448       if (fmt[i] == 'e')
3449 	invalidate_any_buried_refs (XEXP (x, i));
3450       else if (fmt[i] == 'E')
3451 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3452 	  invalidate_any_buried_refs (XVECEXP (x, i, j));
3453     }
3454 }
3455 
3456 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3457    being defined as MEM loads and stores to symbols, with no side effects
3458    and no registers in the expression.  For a MEM destination, we also
3459    check that the insn is still valid if we replace the destination with a
3460    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3461    which don't match this criteria, they are invalidated and trimmed out
3462    later.  */
3463 
3464 static void
3465 compute_ld_motion_mems (void)
3466 {
3467   struct ls_expr * ptr;
3468   basic_block bb;
3469   rtx insn;
3470 
3471   pre_ldst_mems = NULL;
3472   pre_ldst_table
3473     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3474 
3475   FOR_EACH_BB (bb)
3476     {
3477       FOR_BB_INSNS (bb, insn)
3478 	{
3479 	  if (NONDEBUG_INSN_P (insn))
3480 	    {
3481 	      if (GET_CODE (PATTERN (insn)) == SET)
3482 		{
3483 		  rtx src = SET_SRC (PATTERN (insn));
3484 		  rtx dest = SET_DEST (PATTERN (insn));
3485 
3486 		  /* Check for a simple LOAD...  */
3487 		  if (MEM_P (src) && simple_mem (src))
3488 		    {
3489 		      ptr = ldst_entry (src);
3490 		      if (REG_P (dest))
3491 			ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3492 		      else
3493 			ptr->invalid = 1;
3494 		    }
3495 		  else
3496 		    {
3497 		      /* Make sure there isn't a buried load somewhere.  */
3498 		      invalidate_any_buried_refs (src);
3499 		    }
3500 
3501 		  /* Check for stores. Don't worry about aliased ones, they
3502 		     will block any movement we might do later. We only care
3503 		     about this exact pattern since those are the only
3504 		     circumstance that we will ignore the aliasing info.  */
3505 		  if (MEM_P (dest) && simple_mem (dest))
3506 		    {
3507 		      ptr = ldst_entry (dest);
3508 
3509 		      if (! MEM_P (src)
3510 			  && GET_CODE (src) != ASM_OPERANDS
3511 			  /* Check for REG manually since want_to_gcse_p
3512 			     returns 0 for all REGs.  */
3513 			  && can_assign_to_reg_without_clobbers_p (src))
3514 			ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3515 		      else
3516 			ptr->invalid = 1;
3517 		    }
3518 		}
3519 	      else
3520 		invalidate_any_buried_refs (PATTERN (insn));
3521 	    }
3522 	}
3523     }
3524 }
3525 
3526 /* Remove any references that have been either invalidated or are not in the
3527    expression list for pre gcse.  */
3528 
3529 static void
3530 trim_ld_motion_mems (void)
3531 {
3532   struct ls_expr * * last = & pre_ldst_mems;
3533   struct ls_expr * ptr = pre_ldst_mems;
3534 
3535   while (ptr != NULL)
3536     {
3537       struct expr * expr;
3538 
3539       /* Delete if entry has been made invalid.  */
3540       if (! ptr->invalid)
3541 	{
3542 	  /* Delete if we cannot find this mem in the expression list.  */
3543 	  unsigned int hash = ptr->hash_index % expr_hash_table.size;
3544 
3545 	  for (expr = expr_hash_table.table[hash];
3546 	       expr != NULL;
3547 	       expr = expr->next_same_hash)
3548 	    if (expr_equiv_p (expr->expr, ptr->pattern))
3549 	      break;
3550 	}
3551       else
3552 	expr = (struct expr *) 0;
3553 
3554       if (expr)
3555 	{
3556 	  /* Set the expression field if we are keeping it.  */
3557 	  ptr->expr = expr;
3558 	  last = & ptr->next;
3559 	  ptr = ptr->next;
3560 	}
3561       else
3562 	{
3563 	  *last = ptr->next;
3564 	  htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3565 	  free_ldst_entry (ptr);
3566 	  ptr = * last;
3567 	}
3568     }
3569 
3570   /* Show the world what we've found.  */
3571   if (dump_file && pre_ldst_mems != NULL)
3572     print_ldst_list (dump_file);
3573 }
3574 
3575 /* This routine will take an expression which we are replacing with
3576    a reaching register, and update any stores that are needed if
3577    that expression is in the ld_motion list.  Stores are updated by
3578    copying their SRC to the reaching register, and then storing
3579    the reaching register into the store location. These keeps the
3580    correct value in the reaching register for the loads.  */
3581 
3582 static void
3583 update_ld_motion_stores (struct expr * expr)
3584 {
3585   struct ls_expr * mem_ptr;
3586 
3587   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3588     {
3589       /* We can try to find just the REACHED stores, but is shouldn't
3590 	 matter to set the reaching reg everywhere...  some might be
3591 	 dead and should be eliminated later.  */
3592 
3593       /* We replace (set mem expr) with (set reg expr) (set mem reg)
3594 	 where reg is the reaching reg used in the load.  We checked in
3595 	 compute_ld_motion_mems that we can replace (set mem expr) with
3596 	 (set reg expr) in that insn.  */
3597       rtx list = mem_ptr->stores;
3598 
3599       for ( ; list != NULL_RTX; list = XEXP (list, 1))
3600 	{
3601 	  rtx insn = XEXP (list, 0);
3602 	  rtx pat = PATTERN (insn);
3603 	  rtx src = SET_SRC (pat);
3604 	  rtx reg = expr->reaching_reg;
3605 	  rtx copy;
3606 
3607 	  /* If we've already copied it, continue.  */
3608 	  if (expr->reaching_reg == src)
3609 	    continue;
3610 
3611 	  if (dump_file)
3612 	    {
3613 	      fprintf (dump_file, "PRE:  store updated with reaching reg ");
3614 	      print_rtl (dump_file, reg);
3615 	      fprintf (dump_file, ":\n	");
3616 	      print_inline_rtx (dump_file, insn, 8);
3617 	      fprintf (dump_file, "\n");
3618 	    }
3619 
3620 	  copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3621 	  emit_insn_before (copy, insn);
3622 	  SET_SRC (pat) = reg;
3623 	  df_insn_rescan (insn);
3624 
3625 	  /* un-recognize this pattern since it's probably different now.  */
3626 	  INSN_CODE (insn) = -1;
3627 	  gcse_create_count++;
3628 	}
3629     }
3630 }
3631 
3632 /* Return true if the graph is too expensive to optimize. PASS is the
3633    optimization about to be performed.  */
3634 
3635 static bool
3636 is_too_expensive (const char *pass)
3637 {
3638   /* Trying to perform global optimizations on flow graphs which have
3639      a high connectivity will take a long time and is unlikely to be
3640      particularly useful.
3641 
3642      In normal circumstances a cfg should have about twice as many
3643      edges as blocks.  But we do not want to punish small functions
3644      which have a couple switch statements.  Rather than simply
3645      threshold the number of blocks, uses something with a more
3646      graceful degradation.  */
3647   if (n_edges > 20000 + n_basic_blocks * 4)
3648     {
3649       warning (OPT_Wdisabled_optimization,
3650 	       "%s: %d basic blocks and %d edges/basic block",
3651 	       pass, n_basic_blocks, n_edges / n_basic_blocks);
3652 
3653       return true;
3654     }
3655 
3656   /* If allocating memory for the dataflow bitmaps would take up too much
3657      storage it's better just to disable the optimization.  */
3658   if ((n_basic_blocks
3659        * SBITMAP_SET_SIZE (max_reg_num ())
3660        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
3661     {
3662       warning (OPT_Wdisabled_optimization,
3663 	       "%s: %d basic blocks and %d registers",
3664 	       pass, n_basic_blocks, max_reg_num ());
3665 
3666       return true;
3667     }
3668 
3669   return false;
3670 }
3671 
3672 /* All the passes implemented in this file.  Each pass has its
3673    own gate and execute function, and at the end of the file a
3674    pass definition for passes.c.
3675 
3676    We do not construct an accurate cfg in functions which call
3677    setjmp, so none of these passes runs if the function calls
3678    setjmp.
3679    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3680 
3681 static bool
3682 gate_rtl_pre (void)
3683 {
3684   return optimize > 0 && flag_gcse
3685     && !cfun->calls_setjmp
3686     && optimize_function_for_speed_p (cfun)
3687     && dbg_cnt (pre);
3688 }
3689 
3690 static unsigned int
3691 execute_rtl_pre (void)
3692 {
3693   int changed;
3694   delete_unreachable_blocks ();
3695   df_analyze ();
3696   changed = one_pre_gcse_pass ();
3697   flag_rerun_cse_after_global_opts |= changed;
3698   if (changed)
3699     cleanup_cfg (0);
3700   return 0;
3701 }
3702 
3703 static bool
3704 gate_rtl_hoist (void)
3705 {
3706   return optimize > 0 && flag_gcse
3707     && !cfun->calls_setjmp
3708     /* It does not make sense to run code hoisting unless we are optimizing
3709        for code size -- it rarely makes programs faster, and can make then
3710        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
3711     && optimize_function_for_size_p (cfun)
3712     && dbg_cnt (hoist);
3713 }
3714 
3715 static unsigned int
3716 execute_rtl_hoist (void)
3717 {
3718   int changed;
3719   delete_unreachable_blocks ();
3720   df_analyze ();
3721   changed = one_code_hoisting_pass ();
3722   flag_rerun_cse_after_global_opts |= changed;
3723   if (changed)
3724     cleanup_cfg (0);
3725   return 0;
3726 }
3727 
3728 struct rtl_opt_pass pass_rtl_pre =
3729 {
3730  {
3731   RTL_PASS,
3732   "rtl pre",                            /* name */
3733   gate_rtl_pre,                         /* gate */
3734   execute_rtl_pre,    			/* execute */
3735   NULL,                                 /* sub */
3736   NULL,                                 /* next */
3737   0,                                    /* static_pass_number */
3738   TV_PRE,                               /* tv_id */
3739   PROP_cfglayout,                       /* properties_required */
3740   0,                                    /* properties_provided */
3741   0,                                    /* properties_destroyed */
3742   0,                                    /* todo_flags_start */
3743   TODO_df_finish | TODO_verify_rtl_sharing |
3744   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3745  }
3746 };
3747 
3748 struct rtl_opt_pass pass_rtl_hoist =
3749 {
3750  {
3751   RTL_PASS,
3752   "hoist",                              /* name */
3753   gate_rtl_hoist,                       /* gate */
3754   execute_rtl_hoist,  			/* execute */
3755   NULL,                                 /* sub */
3756   NULL,                                 /* next */
3757   0,                                    /* static_pass_number */
3758   TV_HOIST,                             /* tv_id */
3759   PROP_cfglayout,                       /* properties_required */
3760   0,                                    /* properties_provided */
3761   0,                                    /* properties_destroyed */
3762   0,                                    /* todo_flags_start */
3763   TODO_df_finish | TODO_verify_rtl_sharing |
3764   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
3765  }
3766 };
3767 
3768 #include "gt-gcse.h"
3769