1 /* Partial redundancy elimination / Hoisting for RTL.
2    Copyright (C) 1997-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 /* TODO
21    - reordering of memory allocation and freeing to be more space efficient
22    - calc rough register pressure information and use the info to drive all
23      kinds of code motion (including code hoisting) in a unified way.
24 */
25 
26 /* References searched while implementing this.
27 
28    Compilers Principles, Techniques and Tools
29    Aho, Sethi, Ullman
30    Addison-Wesley, 1988
31 
32    Global Optimization by Suppression of Partial Redundancies
33    E. Morel, C. Renvoise
34    communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 
36    A Portable Machine-Independent Global Optimizer - Design and Measurements
37    Frederick Chow
38    Stanford Ph.D. thesis, Dec. 1983
39 
40    A Fast Algorithm for Code Movement Optimization
41    D.M. Dhamdhere
42    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 
44    A Solution to a Problem with Morel and Renvoise's
45    Global Optimization by Suppression of Partial Redundancies
46    K-H Drechsler, M.P. Stadel
47    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 
49    Practical Adaptation of the Global Optimization
50    Algorithm of Morel and Renvoise
51    D.M. Dhamdhere
52    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 
54    Efficiently Computing Static Single Assignment Form and the Control
55    Dependence Graph
56    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 
59    Lazy Code Motion
60    J. Knoop, O. Ruthing, B. Steffen
61    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 
63    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
64    Time for Reducible Flow Control
65    Thomas Ball
66    ACM Letters on Programming Languages and Systems,
67    Vol. 2, Num. 1-4, Mar-Dec 1993
68 
69    An Efficient Representation for Sparse Sets
70    Preston Briggs, Linda Torczon
71    ACM Letters on Programming Languages and Systems,
72    Vol. 2, Num. 1-4, Mar-Dec 1993
73 
74    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75    K-H Drechsler, M.P. Stadel
76    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 
78    Partial Dead Code Elimination
79    J. Knoop, O. Ruthing, B. Steffen
80    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 
82    Effective Partial Redundancy Elimination
83    P. Briggs, K.D. Cooper
84    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 
86    The Program Structure Tree: Computing Control Regions in Linear Time
87    R. Johnson, D. Pearson, K. Pingali
88    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 
90    Optimal Code Motion: Theory and Practice
91    J. Knoop, O. Ruthing, B. Steffen
92    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 
94    The power of assignment motion
95    J. Knoop, O. Ruthing, B. Steffen
96    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 
98    Global code motion / global value numbering
99    C. Click
100    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 
102    Value Driven Redundancy Elimination
103    L.T. Simpson
104    Rice University Ph.D. thesis, Apr. 1996
105 
106    Value Numbering
107    L.T. Simpson
108    Massively Scalar Compiler Project, Rice University, Sep. 1996
109 
110    High Performance Compilers for Parallel Computing
111    Michael Wolfe
112    Addison-Wesley, 1996
113 
114    Advanced Compiler Design and Implementation
115    Steven Muchnick
116    Morgan Kaufmann, 1997
117 
118    Building an Optimizing Compiler
119    Robert Morgan
120    Digital Press, 1998
121 
122    People wishing to speed up the code here should read:
123      Elimination Algorithms for Data Flow Analysis
124      B.G. Ryder, M.C. Paull
125      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 
127      How to Analyze Large Programs Efficiently and Informatively
128      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 
131    People wishing to do something different can find various possibilities
132    in the above papers and elsewhere.
133 */
134 
135 #include "config.h"
136 #include "system.h"
137 #include "coretypes.h"
138 #include "tm.h"
139 #include "diagnostic-core.h"
140 #include "toplev.h"
141 
142 #include "hard-reg-set.h"
143 #include "rtl.h"
144 #include "tree.h"
145 #include "tm_p.h"
146 #include "regs.h"
147 #include "ira.h"
148 #include "flags.h"
149 #include "insn-config.h"
150 #include "recog.h"
151 #include "basic-block.h"
152 #include "function.h"
153 #include "expr.h"
154 #include "except.h"
155 #include "ggc.h"
156 #include "params.h"
157 #include "cselib.h"
158 #include "intl.h"
159 #include "obstack.h"
160 #include "tree-pass.h"
161 #include "hashtab.h"
162 #include "df.h"
163 #include "dbgcnt.h"
164 #include "target.h"
165 #include "gcse.h"
166 
167 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
168    are a superset of those done by classic GCSE.
169 
170    Two passes of copy/constant propagation are done around PRE or hoisting
171    because the first one enables more GCSE and the second one helps to clean
172    up the copies that PRE and HOIST create.  This is needed more for PRE than
173    for HOIST because code hoisting will try to use an existing register
174    containing the common subexpression rather than create a new one.  This is
175    harder to do for PRE because of the code motion (which HOIST doesn't do).
176 
177    Expressions we are interested in GCSE-ing are of the form
178    (set (pseudo-reg) (expression)).
179    Function want_to_gcse_p says what these are.
180 
181    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
182    This allows PRE to hoist expressions that are expressed in multiple insns,
183    such as complex address calculations (e.g. for PIC code, or loads with a
184    high part and a low part).
185 
186    PRE handles moving invariant expressions out of loops (by treating them as
187    partially redundant).
188 
189    **********************
190 
191    We used to support multiple passes but there are diminishing returns in
192    doing so.  The first pass usually makes 90% of the changes that are doable.
193    A second pass can make a few more changes made possible by the first pass.
194    Experiments show any further passes don't make enough changes to justify
195    the expense.
196 
197    A study of spec92 using an unlimited number of passes:
198    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
199    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
200    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
201 
202    It was found doing copy propagation between each pass enables further
203    substitutions.
204 
205    This study was done before expressions in REG_EQUAL notes were added as
206    candidate expressions for optimization, and before the GIMPLE optimizers
207    were added.  Probably, multiple passes is even less efficient now than
208    at the time when the study was conducted.
209 
210    PRE is quite expensive in complicated functions because the DFA can take
211    a while to converge.  Hence we only perform one pass.
212 
213    **********************
214 
215    The steps for PRE are:
216 
217    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
218 
219    2) Perform the data flow analysis for PRE.
220 
221    3) Delete the redundant instructions
222 
223    4) Insert the required copies [if any] that make the partially
224       redundant instructions fully redundant.
225 
226    5) For other reaching expressions, insert an instruction to copy the value
227       to a newly created pseudo that will reach the redundant instruction.
228 
229    The deletion is done first so that when we do insertions we
230    know which pseudo reg to use.
231 
232    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
233    argue it is not.  The number of iterations for the algorithm to converge
234    is typically 2-4 so I don't view it as that expensive (relatively speaking).
235 
236    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
237    we create.  To make an expression reach the place where it's redundant,
238    the result of the expression is copied to a new register, and the redundant
239    expression is deleted by replacing it with this new register.  Classic GCSE
240    doesn't have this problem as much as it computes the reaching defs of
241    each register in each block and thus can try to use an existing
242    register.  */
243 
244 /* GCSE global vars.  */
245 
246 struct target_gcse default_target_gcse;
247 #if SWITCHABLE_TARGET
248 struct target_gcse *this_target_gcse = &default_target_gcse;
249 #endif
250 
251 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
252 int flag_rerun_cse_after_global_opts;
253 
254 /* An obstack for our working variables.  */
255 static struct obstack gcse_obstack;
256 
257 /* Hash table of expressions.  */
258 
259 struct expr
260 {
261   /* The expression.  */
262   rtx expr;
263   /* Index in the available expression bitmaps.  */
264   int bitmap_index;
265   /* Next entry with the same hash.  */
266   struct expr *next_same_hash;
267   /* List of anticipatable occurrences in basic blocks in the function.
268      An "anticipatable occurrence" is one that is the first occurrence in the
269      basic block, the operands are not modified in the basic block prior
270      to the occurrence and the output is not used between the start of
271      the block and the occurrence.  */
272   struct occr *antic_occr;
273   /* List of available occurrence in basic blocks in the function.
274      An "available occurrence" is one that is the last occurrence in the
275      basic block and the operands are not modified by following statements in
276      the basic block [including this insn].  */
277   struct occr *avail_occr;
278   /* Non-null if the computation is PRE redundant.
279      The value is the newly created pseudo-reg to record a copy of the
280      expression in all the places that reach the redundant copy.  */
281   rtx reaching_reg;
282   /* Maximum distance in instructions this expression can travel.
283      We avoid moving simple expressions for more than a few instructions
284      to keep register pressure under control.
285      A value of "0" removes restrictions on how far the expression can
286      travel.  */
287   int max_distance;
288 };
289 
290 /* Occurrence of an expression.
291    There is one per basic block.  If a pattern appears more than once the
292    last appearance is used [or first for anticipatable expressions].  */
293 
294 struct occr
295 {
296   /* Next occurrence of this expression.  */
297   struct occr *next;
298   /* The insn that computes the expression.  */
299   rtx insn;
300   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
301   char deleted_p;
302   /* Nonzero if this [available] occurrence has been copied to
303      reaching_reg.  */
304   /* ??? This is mutually exclusive with deleted_p, so they could share
305      the same byte.  */
306   char copied_p;
307 };
308 
309 typedef struct occr *occr_t;
310 
311 /* Expression hash tables.
312    Each hash table is an array of buckets.
313    ??? It is known that if it were an array of entries, structure elements
314    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
315    not clear whether in the final analysis a sufficient amount of memory would
316    be saved as the size of the available expression bitmaps would be larger
317    [one could build a mapping table without holes afterwards though].
318    Someday I'll perform the computation and figure it out.  */
319 
320 struct hash_table_d
321 {
322   /* The table itself.
323      This is an array of `expr_hash_table_size' elements.  */
324   struct expr **table;
325 
326   /* Size of the hash table, in elements.  */
327   unsigned int size;
328 
329   /* Number of hash table elements.  */
330   unsigned int n_elems;
331 };
332 
333 /* Expression hash table.  */
334 static struct hash_table_d expr_hash_table;
335 
336 /* This is a list of expressions which are MEMs and will be used by load
337    or store motion.
338    Load motion tracks MEMs which aren't killed by anything except itself,
339    i.e. loads and stores to a single location.
340    We can then allow movement of these MEM refs with a little special
341    allowance. (all stores copy the same value to the reaching reg used
342    for the loads).  This means all values used to store into memory must have
343    no side effects so we can re-issue the setter value.  */
344 
345 struct ls_expr
346 {
347   struct expr * expr;		/* Gcse expression reference for LM.  */
348   rtx pattern;			/* Pattern of this mem.  */
349   rtx pattern_regs;		/* List of registers mentioned by the mem.  */
350   rtx loads;			/* INSN list of loads seen.  */
351   rtx stores;			/* INSN list of stores seen.  */
352   struct ls_expr * next;	/* Next in the list.  */
353   int invalid;			/* Invalid for some reason.  */
354   int index;			/* If it maps to a bitmap index.  */
355   unsigned int hash_index;	/* Index when in a hash table.  */
356   rtx reaching_reg;		/* Register to use when re-writing.  */
357 };
358 
359 /* Head of the list of load/store memory refs.  */
360 static struct ls_expr * pre_ldst_mems = NULL;
361 
362 /* Hashtable for the load/store memory refs.  */
363 static htab_t pre_ldst_table = NULL;
364 
365 /* Bitmap containing one bit for each register in the program.
366    Used when performing GCSE to track which registers have been set since
367    the start of the basic block.  */
368 static regset reg_set_bitmap;
369 
370 /* Array, indexed by basic block number for a list of insns which modify
371    memory within that block.  */
372 static vec<rtx> *modify_mem_list;
373 static bitmap modify_mem_list_set;
374 
375 typedef struct modify_pair_s
376 {
377   rtx dest;			/* A MEM.  */
378   rtx dest_addr;		/* The canonical address of `dest'.  */
379 } modify_pair;
380 
381 
382 /* This array parallels modify_mem_list, except that it stores MEMs
383    being set and their canonicalized memory addresses.  */
384 static vec<modify_pair> *canon_modify_mem_list;
385 
386 /* Bitmap indexed by block numbers to record which blocks contain
387    function calls.  */
388 static bitmap blocks_with_calls;
389 
390 /* Various variables for statistics gathering.  */
391 
392 /* Memory used in a pass.
393    This isn't intended to be absolutely precise.  Its intent is only
394    to keep an eye on memory usage.  */
395 static int bytes_used;
396 
397 /* GCSE substitutions made.  */
398 static int gcse_subst_count;
399 /* Number of copy instructions created.  */
400 static int gcse_create_count;
401 
402 /* Doing code hoisting.  */
403 static bool doing_code_hoisting_p = false;
404 
405 /* For available exprs */
406 static sbitmap *ae_kill;
407 
408 /* Data stored for each basic block.  */
409 struct bb_data
410 {
411   /* Maximal register pressure inside basic block for given register class
412      (defined only for the pressure classes).  */
413   int max_reg_pressure[N_REG_CLASSES];
414   /* Recorded register pressure of basic block before trying to hoist
415      an expression.  Will be used to restore the register pressure
416      if the expression should not be hoisted.  */
417   int old_pressure;
418   /* Recorded register live_in info of basic block during code hoisting
419      process.  BACKUP is used to record live_in info before trying to
420      hoist an expression, and will be used to restore LIVE_IN if the
421      expression should not be hoisted.  */
422   bitmap live_in, backup;
423 };
424 
425 #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
426 
427 static basic_block curr_bb;
428 
429 /* Current register pressure for each pressure class.  */
430 static int curr_reg_pressure[N_REG_CLASSES];
431 
432 
433 static void compute_can_copy (void);
434 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
435 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
436 static void *gcse_alloc (unsigned long);
437 static void alloc_gcse_mem (void);
438 static void free_gcse_mem (void);
439 static void hash_scan_insn (rtx, struct hash_table_d *);
440 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
441 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
442 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
443 static int want_to_gcse_p (rtx, int *);
444 static int oprs_unchanged_p (const_rtx, const_rtx, int);
445 static int oprs_anticipatable_p (const_rtx, const_rtx);
446 static int oprs_available_p (const_rtx, const_rtx);
447 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
448 				  struct hash_table_d *);
449 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
450 static int expr_equiv_p (const_rtx, const_rtx);
451 static void record_last_reg_set_info (rtx, int);
452 static void record_last_mem_set_info (rtx);
453 static void record_last_set_info (rtx, const_rtx, void *);
454 static void compute_hash_table (struct hash_table_d *);
455 static void alloc_hash_table (struct hash_table_d *);
456 static void free_hash_table (struct hash_table_d *);
457 static void compute_hash_table_work (struct hash_table_d *);
458 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
459 static void compute_transp (const_rtx, int, sbitmap *);
460 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
461 				      struct hash_table_d *);
462 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
463 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
464 static void canon_list_insert (rtx, const_rtx, void *);
465 static void alloc_pre_mem (int, int);
466 static void free_pre_mem (void);
467 static struct edge_list *compute_pre_data (void);
468 static int pre_expr_reaches_here_p (basic_block, struct expr *,
469 				    basic_block);
470 static void insert_insn_end_basic_block (struct expr *, basic_block);
471 static void pre_insert_copy_insn (struct expr *, rtx);
472 static void pre_insert_copies (void);
473 static int pre_delete (void);
474 static int pre_gcse (struct edge_list *);
475 static int one_pre_gcse_pass (void);
476 static void add_label_notes (rtx, rtx);
477 static void alloc_code_hoist_mem (int, int);
478 static void free_code_hoist_mem (void);
479 static void compute_code_hoist_vbeinout (void);
480 static void compute_code_hoist_data (void);
481 static int should_hoist_expr_to_dom (basic_block, struct expr *, basic_block,
482 				     sbitmap, int, int *, enum reg_class,
483 				     int *, bitmap, rtx);
484 static int hoist_code (void);
485 static enum reg_class get_regno_pressure_class (int regno, int *nregs);
486 static enum reg_class get_pressure_class_and_nregs (rtx insn, int *nregs);
487 static int one_code_hoisting_pass (void);
488 static rtx process_insert_insn (struct expr *);
489 static int pre_edge_insert (struct edge_list *, struct expr **);
490 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
491 					 basic_block, char *);
492 static struct ls_expr * ldst_entry (rtx);
493 static void free_ldst_entry (struct ls_expr *);
494 static void free_ld_motion_mems (void);
495 static void print_ldst_list (FILE *);
496 static struct ls_expr * find_rtx_in_ldst (rtx);
497 static int simple_mem (const_rtx);
498 static void invalidate_any_buried_refs (rtx);
499 static void compute_ld_motion_mems (void);
500 static void trim_ld_motion_mems (void);
501 static void update_ld_motion_stores (struct expr *);
502 static void clear_modify_mem_tables (void);
503 static void free_modify_mem_tables (void);
504 static rtx gcse_emit_move_after (rtx, rtx, rtx);
505 static bool is_too_expensive (const char *);
506 
507 #define GNEW(T)			((T *) gmalloc (sizeof (T)))
508 #define GCNEW(T)		((T *) gcalloc (1, sizeof (T)))
509 
510 #define GNEWVEC(T, N)		((T *) gmalloc (sizeof (T) * (N)))
511 #define GCNEWVEC(T, N)		((T *) gcalloc ((N), sizeof (T)))
512 
513 #define GNEWVAR(T, S)		((T *) gmalloc ((S)))
514 #define GCNEWVAR(T, S)		((T *) gcalloc (1, (S)))
515 
516 #define GOBNEW(T)		((T *) gcse_alloc (sizeof (T)))
517 #define GOBNEWVAR(T, S)		((T *) gcse_alloc ((S)))
518 
519 /* Misc. utilities.  */
520 
521 #define can_copy \
522   (this_target_gcse->x_can_copy)
523 #define can_copy_init_p \
524   (this_target_gcse->x_can_copy_init_p)
525 
526 /* Compute which modes support reg/reg copy operations.  */
527 
528 static void
compute_can_copy(void)529 compute_can_copy (void)
530 {
531   int i;
532 #ifndef AVOID_CCMODE_COPIES
533   rtx reg, insn;
534 #endif
535   memset (can_copy, 0, NUM_MACHINE_MODES);
536 
537   start_sequence ();
538   for (i = 0; i < NUM_MACHINE_MODES; i++)
539     if (GET_MODE_CLASS (i) == MODE_CC)
540       {
541 #ifdef AVOID_CCMODE_COPIES
542 	can_copy[i] = 0;
543 #else
544 	reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
545 	insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
546 	if (recog (PATTERN (insn), insn, NULL) >= 0)
547 	  can_copy[i] = 1;
548 #endif
549       }
550     else
551       can_copy[i] = 1;
552 
553   end_sequence ();
554 }
555 
556 /* Returns whether the mode supports reg/reg copy operations.  */
557 
558 bool
can_copy_p(enum machine_mode mode)559 can_copy_p (enum machine_mode mode)
560 {
561   if (! can_copy_init_p)
562     {
563       compute_can_copy ();
564       can_copy_init_p = true;
565     }
566 
567   return can_copy[mode] != 0;
568 }
569 
570 /* Cover function to xmalloc to record bytes allocated.  */
571 
572 static void *
gmalloc(size_t size)573 gmalloc (size_t size)
574 {
575   bytes_used += size;
576   return xmalloc (size);
577 }
578 
579 /* Cover function to xcalloc to record bytes allocated.  */
580 
581 static void *
gcalloc(size_t nelem,size_t elsize)582 gcalloc (size_t nelem, size_t elsize)
583 {
584   bytes_used += nelem * elsize;
585   return xcalloc (nelem, elsize);
586 }
587 
588 /* Cover function to obstack_alloc.  */
589 
590 static void *
gcse_alloc(unsigned long size)591 gcse_alloc (unsigned long size)
592 {
593   bytes_used += size;
594   return obstack_alloc (&gcse_obstack, size);
595 }
596 
597 /* Allocate memory for the reg/memory set tracking tables.
598    This is called at the start of each pass.  */
599 
600 static void
alloc_gcse_mem(void)601 alloc_gcse_mem (void)
602 {
603   /* Allocate vars to track sets of regs.  */
604   reg_set_bitmap = ALLOC_REG_SET (NULL);
605 
606   /* Allocate array to keep a list of insns which modify memory in each
607      basic block.  The two typedefs are needed to work around the
608      pre-processor limitation with template types in macro arguments.  */
609   typedef vec<rtx> vec_rtx_heap;
610   typedef vec<modify_pair> vec_modify_pair_heap;
611   modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block);
612   canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap, last_basic_block);
613   modify_mem_list_set = BITMAP_ALLOC (NULL);
614   blocks_with_calls = BITMAP_ALLOC (NULL);
615 }
616 
617 /* Free memory allocated by alloc_gcse_mem.  */
618 
619 static void
free_gcse_mem(void)620 free_gcse_mem (void)
621 {
622   FREE_REG_SET (reg_set_bitmap);
623 
624   free_modify_mem_tables ();
625   BITMAP_FREE (modify_mem_list_set);
626   BITMAP_FREE (blocks_with_calls);
627 }
628 
629 /* Compute the local properties of each recorded expression.
630 
631    Local properties are those that are defined by the block, irrespective of
632    other blocks.
633 
634    An expression is transparent in a block if its operands are not modified
635    in the block.
636 
637    An expression is computed (locally available) in a block if it is computed
638    at least once and expression would contain the same value if the
639    computation was moved to the end of the block.
640 
641    An expression is locally anticipatable in a block if it is computed at
642    least once and expression would contain the same value if the computation
643    was moved to the beginning of the block.
644 
645    We call this routine for pre and code hoisting.  They all compute
646    basically the same information and thus can easily share this code.
647 
648    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
649    properties.  If NULL, then it is not necessary to compute or record that
650    particular property.
651 
652    TABLE controls which hash table to look at.  */
653 
654 static void
compute_local_properties(sbitmap * transp,sbitmap * comp,sbitmap * antloc,struct hash_table_d * table)655 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
656 			  struct hash_table_d *table)
657 {
658   unsigned int i;
659 
660   /* Initialize any bitmaps that were passed in.  */
661   if (transp)
662     {
663       bitmap_vector_ones (transp, last_basic_block);
664     }
665 
666   if (comp)
667     bitmap_vector_clear (comp, last_basic_block);
668   if (antloc)
669     bitmap_vector_clear (antloc, last_basic_block);
670 
671   for (i = 0; i < table->size; i++)
672     {
673       struct expr *expr;
674 
675       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
676 	{
677 	  int indx = expr->bitmap_index;
678 	  struct occr *occr;
679 
680 	  /* The expression is transparent in this block if it is not killed.
681 	     We start by assuming all are transparent [none are killed], and
682 	     then reset the bits for those that are.  */
683 	  if (transp)
684 	    compute_transp (expr->expr, indx, transp);
685 
686 	  /* The occurrences recorded in antic_occr are exactly those that
687 	     we want to set to nonzero in ANTLOC.  */
688 	  if (antloc)
689 	    for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
690 	      {
691 		bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
692 
693 		/* While we're scanning the table, this is a good place to
694 		   initialize this.  */
695 		occr->deleted_p = 0;
696 	      }
697 
698 	  /* The occurrences recorded in avail_occr are exactly those that
699 	     we want to set to nonzero in COMP.  */
700 	  if (comp)
701 	    for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
702 	      {
703 		bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
704 
705 		/* While we're scanning the table, this is a good place to
706 		   initialize this.  */
707 		occr->copied_p = 0;
708 	      }
709 
710 	  /* While we're scanning the table, this is a good place to
711 	     initialize this.  */
712 	  expr->reaching_reg = 0;
713 	}
714     }
715 }
716 
717 /* Hash table support.  */
718 
719 struct reg_avail_info
720 {
721   basic_block last_bb;
722   int first_set;
723   int last_set;
724 };
725 
726 static struct reg_avail_info *reg_avail_info;
727 static basic_block current_bb;
728 
729 /* See whether X, the source of a set, is something we want to consider for
730    GCSE.  */
731 
732 static int
want_to_gcse_p(rtx x,int * max_distance_ptr)733 want_to_gcse_p (rtx x, int *max_distance_ptr)
734 {
735 #ifdef STACK_REGS
736   /* On register stack architectures, don't GCSE constants from the
737      constant pool, as the benefits are often swamped by the overhead
738      of shuffling the register stack between basic blocks.  */
739   if (IS_STACK_MODE (GET_MODE (x)))
740     x = avoid_constant_pool_reference (x);
741 #endif
742 
743   /* GCSE'ing constants:
744 
745      We do not specifically distinguish between constant and non-constant
746      expressions in PRE and Hoist.  We use set_src_cost below to limit
747      the maximum distance simple expressions can travel.
748 
749      Nevertheless, constants are much easier to GCSE, and, hence,
750      it is easy to overdo the optimizations.  Usually, excessive PRE and
751      Hoisting of constant leads to increased register pressure.
752 
753      RA can deal with this by rematerialing some of the constants.
754      Therefore, it is important that the back-end generates sets of constants
755      in a way that allows reload rematerialize them under high register
756      pressure, i.e., a pseudo register with REG_EQUAL to constant
757      is set only once.  Failing to do so will result in IRA/reload
758      spilling such constants under high register pressure instead of
759      rematerializing them.  */
760 
761   switch (GET_CODE (x))
762     {
763     case REG:
764     case SUBREG:
765     case CALL:
766       return 0;
767 
768     CASE_CONST_ANY:
769       if (!doing_code_hoisting_p)
770 	/* Do not PRE constants.  */
771 	return 0;
772 
773       /* FALLTHRU */
774 
775     default:
776       if (doing_code_hoisting_p)
777 	/* PRE doesn't implement max_distance restriction.  */
778 	{
779 	  int cost;
780 	  int max_distance;
781 
782 	  gcc_assert (!optimize_function_for_speed_p (cfun)
783 		      && optimize_function_for_size_p (cfun));
784 	  cost = set_src_cost (x, 0);
785 
786 	  if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
787 	    {
788 	      max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
789 	      if (max_distance == 0)
790 		return 0;
791 
792 	      gcc_assert (max_distance > 0);
793 	    }
794 	  else
795 	    max_distance = 0;
796 
797 	  if (max_distance_ptr)
798 	    *max_distance_ptr = max_distance;
799 	}
800 
801       return can_assign_to_reg_without_clobbers_p (x);
802     }
803 }
804 
805 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
806 
807 static GTY(()) rtx test_insn;
808 
809 /* Return true if we can assign X to a pseudo register such that the
810    resulting insn does not result in clobbering a hard register as a
811    side-effect.
812 
813    Additionally, if the target requires it, check that the resulting insn
814    can be copied.  If it cannot, this means that X is special and probably
815    has hidden side-effects we don't want to mess with.
816 
817    This function is typically used by code motion passes, to verify
818    that it is safe to insert an insn without worrying about clobbering
819    maybe live hard regs.  */
820 
821 bool
can_assign_to_reg_without_clobbers_p(rtx x)822 can_assign_to_reg_without_clobbers_p (rtx x)
823 {
824   int num_clobbers = 0;
825   int icode;
826 
827   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
828   if (general_operand (x, GET_MODE (x)))
829     return 1;
830   else if (GET_MODE (x) == VOIDmode)
831     return 0;
832 
833   /* Otherwise, check if we can make a valid insn from it.  First initialize
834      our test insn if we haven't already.  */
835   if (test_insn == 0)
836     {
837       test_insn
838 	= make_insn_raw (gen_rtx_SET (VOIDmode,
839 				      gen_rtx_REG (word_mode,
840 						   FIRST_PSEUDO_REGISTER * 2),
841 				      const0_rtx));
842       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
843     }
844 
845   /* Now make an insn like the one we would make when GCSE'ing and see if
846      valid.  */
847   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
848   SET_SRC (PATTERN (test_insn)) = x;
849 
850   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
851   if (icode < 0)
852     return false;
853 
854   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
855     return false;
856 
857   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
858     return false;
859 
860   return true;
861 }
862 
863 /* Return nonzero if the operands of expression X are unchanged from the
864    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
865    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
866 
867 static int
oprs_unchanged_p(const_rtx x,const_rtx insn,int avail_p)868 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
869 {
870   int i, j;
871   enum rtx_code code;
872   const char *fmt;
873 
874   if (x == 0)
875     return 1;
876 
877   code = GET_CODE (x);
878   switch (code)
879     {
880     case REG:
881       {
882 	struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
883 
884 	if (info->last_bb != current_bb)
885 	  return 1;
886 	if (avail_p)
887 	  return info->last_set < DF_INSN_LUID (insn);
888 	else
889 	  return info->first_set >= DF_INSN_LUID (insn);
890       }
891 
892     case MEM:
893       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
894 				  x, avail_p))
895 	return 0;
896       else
897 	return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
898 
899     case PRE_DEC:
900     case PRE_INC:
901     case POST_DEC:
902     case POST_INC:
903     case PRE_MODIFY:
904     case POST_MODIFY:
905       return 0;
906 
907     case PC:
908     case CC0: /*FIXME*/
909     case CONST:
910     CASE_CONST_ANY:
911     case SYMBOL_REF:
912     case LABEL_REF:
913     case ADDR_VEC:
914     case ADDR_DIFF_VEC:
915       return 1;
916 
917     default:
918       break;
919     }
920 
921   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
922     {
923       if (fmt[i] == 'e')
924 	{
925 	  /* If we are about to do the last recursive call needed at this
926 	     level, change it into iteration.  This function is called enough
927 	     to be worth it.  */
928 	  if (i == 0)
929 	    return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
930 
931 	  else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
932 	    return 0;
933 	}
934       else if (fmt[i] == 'E')
935 	for (j = 0; j < XVECLEN (x, i); j++)
936 	  if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
937 	    return 0;
938     }
939 
940   return 1;
941 }
942 
943 /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p.  */
944 
945 struct mem_conflict_info
946 {
947   /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
948      see if a memory store conflicts with this memory load.  */
949   const_rtx mem;
950 
951   /* True if mems_conflict_for_gcse_p finds a conflict between two memory
952      references.  */
953   bool conflict;
954 };
955 
956 /* DEST is the output of an instruction.  If it is a memory reference and
957    possibly conflicts with the load found in DATA, then communicate this
958    information back through DATA.  */
959 
960 static void
mems_conflict_for_gcse_p(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)961 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
962 			  void *data)
963 {
964   struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
965 
966   while (GET_CODE (dest) == SUBREG
967 	 || GET_CODE (dest) == ZERO_EXTRACT
968 	 || GET_CODE (dest) == STRICT_LOW_PART)
969     dest = XEXP (dest, 0);
970 
971   /* If DEST is not a MEM, then it will not conflict with the load.  Note
972      that function calls are assumed to clobber memory, but are handled
973      elsewhere.  */
974   if (! MEM_P (dest))
975     return;
976 
977   /* If we are setting a MEM in our list of specially recognized MEMs,
978      don't mark as killed this time.  */
979   if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
980     {
981       if (!find_rtx_in_ldst (dest))
982 	mci->conflict = true;
983       return;
984     }
985 
986   if (true_dependence (dest, GET_MODE (dest), mci->mem))
987     mci->conflict = true;
988 }
989 
990 /* Return nonzero if the expression in X (a memory reference) is killed
991    in block BB before or after the insn with the LUID in UID_LIMIT.
992    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
993    before UID_LIMIT.
994 
995    To check the entire block, set UID_LIMIT to max_uid + 1 and
996    AVAIL_P to 0.  */
997 
998 static int
load_killed_in_block_p(const_basic_block bb,int uid_limit,const_rtx x,int avail_p)999 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1000 			int avail_p)
1001 {
1002   vec<rtx> list = modify_mem_list[bb->index];
1003   rtx setter;
1004   unsigned ix;
1005 
1006   /* If this is a readonly then we aren't going to be changing it.  */
1007   if (MEM_READONLY_P (x))
1008     return 0;
1009 
1010   FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1011     {
1012       struct mem_conflict_info mci;
1013 
1014       /* Ignore entries in the list that do not apply.  */
1015       if ((avail_p
1016 	   && DF_INSN_LUID (setter) < uid_limit)
1017 	  || (! avail_p
1018 	      && DF_INSN_LUID (setter) > uid_limit))
1019 	continue;
1020 
1021       /* If SETTER is a call everything is clobbered.  Note that calls
1022 	 to pure functions are never put on the list, so we need not
1023 	 worry about them.  */
1024       if (CALL_P (setter))
1025 	return 1;
1026 
1027       /* SETTER must be an INSN of some kind that sets memory.  Call
1028 	 note_stores to examine each hunk of memory that is modified.  */
1029       mci.mem = x;
1030       mci.conflict = false;
1031       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci);
1032       if (mci.conflict)
1033 	return 1;
1034     }
1035   return 0;
1036 }
1037 
1038 /* Return nonzero if the operands of expression X are unchanged from
1039    the start of INSN's basic block up to but not including INSN.  */
1040 
1041 static int
oprs_anticipatable_p(const_rtx x,const_rtx insn)1042 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1043 {
1044   return oprs_unchanged_p (x, insn, 0);
1045 }
1046 
1047 /* Return nonzero if the operands of expression X are unchanged from
1048    INSN to the end of INSN's basic block.  */
1049 
1050 static int
oprs_available_p(const_rtx x,const_rtx insn)1051 oprs_available_p (const_rtx x, const_rtx insn)
1052 {
1053   return oprs_unchanged_p (x, insn, 1);
1054 }
1055 
1056 /* Hash expression X.
1057 
1058    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1059    indicating if a volatile operand is found or if the expression contains
1060    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1061    the current size of the hash table to be probed.  */
1062 
1063 static unsigned int
hash_expr(const_rtx x,enum machine_mode mode,int * do_not_record_p,int hash_table_size)1064 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1065 	   int hash_table_size)
1066 {
1067   unsigned int hash;
1068 
1069   *do_not_record_p = 0;
1070 
1071   hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1072   return hash % hash_table_size;
1073 }
1074 
1075 /* Return nonzero if exp1 is equivalent to exp2.  */
1076 
1077 static int
expr_equiv_p(const_rtx x,const_rtx y)1078 expr_equiv_p (const_rtx x, const_rtx y)
1079 {
1080   return exp_equiv_p (x, y, 0, true);
1081 }
1082 
1083 /* Insert expression X in INSN in the hash TABLE.
1084    If it is already present, record it as the last occurrence in INSN's
1085    basic block.
1086 
1087    MODE is the mode of the value X is being stored into.
1088    It is only used if X is a CONST_INT.
1089 
1090    ANTIC_P is nonzero if X is an anticipatable expression.
1091    AVAIL_P is nonzero if X is an available expression.
1092 
1093    MAX_DISTANCE is the maximum distance in instructions this expression can
1094    be moved.  */
1095 
1096 static void
insert_expr_in_table(rtx x,enum machine_mode mode,rtx insn,int antic_p,int avail_p,int max_distance,struct hash_table_d * table)1097 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1098 		      int avail_p, int max_distance, struct hash_table_d *table)
1099 {
1100   int found, do_not_record_p;
1101   unsigned int hash;
1102   struct expr *cur_expr, *last_expr = NULL;
1103   struct occr *antic_occr, *avail_occr;
1104 
1105   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1106 
1107   /* Do not insert expression in table if it contains volatile operands,
1108      or if hash_expr determines the expression is something we don't want
1109      to or can't handle.  */
1110   if (do_not_record_p)
1111     return;
1112 
1113   cur_expr = table->table[hash];
1114   found = 0;
1115 
1116   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1117     {
1118       /* If the expression isn't found, save a pointer to the end of
1119 	 the list.  */
1120       last_expr = cur_expr;
1121       cur_expr = cur_expr->next_same_hash;
1122     }
1123 
1124   if (! found)
1125     {
1126       cur_expr = GOBNEW (struct expr);
1127       bytes_used += sizeof (struct expr);
1128       if (table->table[hash] == NULL)
1129 	/* This is the first pattern that hashed to this index.  */
1130 	table->table[hash] = cur_expr;
1131       else
1132 	/* Add EXPR to end of this hash chain.  */
1133 	last_expr->next_same_hash = cur_expr;
1134 
1135       /* Set the fields of the expr element.  */
1136       cur_expr->expr = x;
1137       cur_expr->bitmap_index = table->n_elems++;
1138       cur_expr->next_same_hash = NULL;
1139       cur_expr->antic_occr = NULL;
1140       cur_expr->avail_occr = NULL;
1141       gcc_assert (max_distance >= 0);
1142       cur_expr->max_distance = max_distance;
1143     }
1144   else
1145     gcc_assert (cur_expr->max_distance == max_distance);
1146 
1147   /* Now record the occurrence(s).  */
1148   if (antic_p)
1149     {
1150       antic_occr = cur_expr->antic_occr;
1151 
1152       if (antic_occr
1153 	  && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1154 	antic_occr = NULL;
1155 
1156       if (antic_occr)
1157 	/* Found another instance of the expression in the same basic block.
1158 	   Prefer the currently recorded one.  We want the first one in the
1159 	   block and the block is scanned from start to end.  */
1160 	; /* nothing to do */
1161       else
1162 	{
1163 	  /* First occurrence of this expression in this basic block.  */
1164 	  antic_occr = GOBNEW (struct occr);
1165 	  bytes_used += sizeof (struct occr);
1166 	  antic_occr->insn = insn;
1167 	  antic_occr->next = cur_expr->antic_occr;
1168 	  antic_occr->deleted_p = 0;
1169 	  cur_expr->antic_occr = antic_occr;
1170 	}
1171     }
1172 
1173   if (avail_p)
1174     {
1175       avail_occr = cur_expr->avail_occr;
1176 
1177       if (avail_occr
1178 	  && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1179 	{
1180 	  /* Found another instance of the expression in the same basic block.
1181 	     Prefer this occurrence to the currently recorded one.  We want
1182 	     the last one in the block and the block is scanned from start
1183 	     to end.  */
1184 	  avail_occr->insn = insn;
1185 	}
1186       else
1187 	{
1188 	  /* First occurrence of this expression in this basic block.  */
1189 	  avail_occr = GOBNEW (struct occr);
1190 	  bytes_used += sizeof (struct occr);
1191 	  avail_occr->insn = insn;
1192 	  avail_occr->next = cur_expr->avail_occr;
1193 	  avail_occr->deleted_p = 0;
1194 	  cur_expr->avail_occr = avail_occr;
1195 	}
1196     }
1197 }
1198 
1199 /* Scan SET present in INSN and add an entry to the hash TABLE.  */
1200 
1201 static void
hash_scan_set(rtx set,rtx insn,struct hash_table_d * table)1202 hash_scan_set (rtx set, rtx insn, struct hash_table_d *table)
1203 {
1204   rtx src = SET_SRC (set);
1205   rtx dest = SET_DEST (set);
1206   rtx note;
1207 
1208   if (GET_CODE (src) == CALL)
1209     hash_scan_call (src, insn, table);
1210 
1211   else if (REG_P (dest))
1212     {
1213       unsigned int regno = REGNO (dest);
1214       int max_distance = 0;
1215 
1216       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1217 
1218 	 This allows us to do a single GCSE pass and still eliminate
1219 	 redundant constants, addresses or other expressions that are
1220 	 constructed with multiple instructions.
1221 
1222 	 However, keep the original SRC if INSN is a simple reg-reg move.
1223 	 In this case, there will almost always be a REG_EQUAL note on the
1224 	 insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1225 	 for INSN, we miss copy propagation opportunities and we perform the
1226 	 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1227 	 do more than one PRE GCSE pass.
1228 
1229 	 Note that this does not impede profitable constant propagations.  We
1230 	 "look through" reg-reg sets in lookup_avail_set.  */
1231       note = find_reg_equal_equiv_note (insn);
1232       if (note != 0
1233 	  && REG_NOTE_KIND (note) == REG_EQUAL
1234 	  && !REG_P (src)
1235 	  && want_to_gcse_p (XEXP (note, 0), NULL))
1236 	src = XEXP (note, 0), set = gen_rtx_SET (VOIDmode, dest, src);
1237 
1238       /* Only record sets of pseudo-regs in the hash table.  */
1239       if (regno >= FIRST_PSEUDO_REGISTER
1240 	  /* Don't GCSE something if we can't do a reg/reg copy.  */
1241 	  && can_copy_p (GET_MODE (dest))
1242 	  /* GCSE commonly inserts instruction after the insn.  We can't
1243 	     do that easily for EH edges so disable GCSE on these for now.  */
1244 	  /* ??? We can now easily create new EH landing pads at the
1245 	     gimple level, for splitting edges; there's no reason we
1246 	     can't do the same thing at the rtl level.  */
1247 	  && !can_throw_internal (insn)
1248 	  /* Is SET_SRC something we want to gcse?  */
1249 	  && want_to_gcse_p (src, &max_distance)
1250 	  /* Don't CSE a nop.  */
1251 	  && ! set_noop_p (set)
1252 	  /* Don't GCSE if it has attached REG_EQUIV note.
1253 	     At this point this only function parameters should have
1254 	     REG_EQUIV notes and if the argument slot is used somewhere
1255 	     explicitly, it means address of parameter has been taken,
1256 	     so we should not extend the lifetime of the pseudo.  */
1257 	  && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1258 	{
1259 	  /* An expression is not anticipatable if its operands are
1260 	     modified before this insn or if this is not the only SET in
1261 	     this insn.  The latter condition does not have to mean that
1262 	     SRC itself is not anticipatable, but we just will not be
1263 	     able to handle code motion of insns with multiple sets.  */
1264 	  int antic_p = oprs_anticipatable_p (src, insn)
1265 			&& !multiple_sets (insn);
1266 	  /* An expression is not available if its operands are
1267 	     subsequently modified, including this insn.  It's also not
1268 	     available if this is a branch, because we can't insert
1269 	     a set after the branch.  */
1270 	  int avail_p = (oprs_available_p (src, insn)
1271 			 && ! JUMP_P (insn));
1272 
1273 	  insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1274 				max_distance, table);
1275 	}
1276     }
1277   /* In case of store we want to consider the memory value as available in
1278      the REG stored in that memory. This makes it possible to remove
1279      redundant loads from due to stores to the same location.  */
1280   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1281       {
1282         unsigned int regno = REGNO (src);
1283 	int max_distance = 0;
1284 
1285 	/* Only record sets of pseudo-regs in the hash table.  */
1286         if (regno >= FIRST_PSEUDO_REGISTER
1287 	   /* Don't GCSE something if we can't do a reg/reg copy.  */
1288 	   && can_copy_p (GET_MODE (src))
1289 	   /* GCSE commonly inserts instruction after the insn.  We can't
1290 	      do that easily for EH edges so disable GCSE on these for now.  */
1291 	   && !can_throw_internal (insn)
1292 	   /* Is SET_DEST something we want to gcse?  */
1293 	   && want_to_gcse_p (dest, &max_distance)
1294 	   /* Don't CSE a nop.  */
1295 	   && ! set_noop_p (set)
1296 	   /* Don't GCSE if it has attached REG_EQUIV note.
1297 	      At this point this only function parameters should have
1298 	      REG_EQUIV notes and if the argument slot is used somewhere
1299 	      explicitly, it means address of parameter has been taken,
1300 	      so we should not extend the lifetime of the pseudo.  */
1301 	   && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1302 	       || ! MEM_P (XEXP (note, 0))))
1303              {
1304                /* Stores are never anticipatable.  */
1305                int antic_p = 0;
1306 	       /* An expression is not available if its operands are
1307 	          subsequently modified, including this insn.  It's also not
1308 	          available if this is a branch, because we can't insert
1309 	          a set after the branch.  */
1310                int avail_p = oprs_available_p (dest, insn)
1311 			     && ! JUMP_P (insn);
1312 
1313 	       /* Record the memory expression (DEST) in the hash table.  */
1314 	       insert_expr_in_table (dest, GET_MODE (dest), insn,
1315 				     antic_p, avail_p, max_distance, table);
1316              }
1317       }
1318 }
1319 
1320 static void
hash_scan_clobber(rtx x ATTRIBUTE_UNUSED,rtx insn ATTRIBUTE_UNUSED,struct hash_table_d * table ATTRIBUTE_UNUSED)1321 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1322 		   struct hash_table_d *table ATTRIBUTE_UNUSED)
1323 {
1324   /* Currently nothing to do.  */
1325 }
1326 
1327 static void
hash_scan_call(rtx x ATTRIBUTE_UNUSED,rtx insn ATTRIBUTE_UNUSED,struct hash_table_d * table ATTRIBUTE_UNUSED)1328 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1329 		struct hash_table_d *table ATTRIBUTE_UNUSED)
1330 {
1331   /* Currently nothing to do.  */
1332 }
1333 
1334 /* Process INSN and add hash table entries as appropriate.  */
1335 
1336 static void
hash_scan_insn(rtx insn,struct hash_table_d * table)1337 hash_scan_insn (rtx insn, struct hash_table_d *table)
1338 {
1339   rtx pat = PATTERN (insn);
1340   int i;
1341 
1342   /* Pick out the sets of INSN and for other forms of instructions record
1343      what's been modified.  */
1344 
1345   if (GET_CODE (pat) == SET)
1346     hash_scan_set (pat, insn, table);
1347 
1348   else if (GET_CODE (pat) == CLOBBER)
1349     hash_scan_clobber (pat, insn, table);
1350 
1351   else if (GET_CODE (pat) == CALL)
1352     hash_scan_call (pat, insn, table);
1353 
1354   else if (GET_CODE (pat) == PARALLEL)
1355     for (i = 0; i < XVECLEN (pat, 0); i++)
1356       {
1357 	rtx x = XVECEXP (pat, 0, i);
1358 
1359 	if (GET_CODE (x) == SET)
1360 	  hash_scan_set (x, insn, table);
1361 	else if (GET_CODE (x) == CLOBBER)
1362 	  hash_scan_clobber (x, insn, table);
1363 	else if (GET_CODE (x) == CALL)
1364 	  hash_scan_call (x, insn, table);
1365       }
1366 }
1367 
1368 /* Dump the hash table TABLE to file FILE under the name NAME.  */
1369 
1370 static void
dump_hash_table(FILE * file,const char * name,struct hash_table_d * table)1371 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1372 {
1373   int i;
1374   /* Flattened out table, so it's printed in proper order.  */
1375   struct expr **flat_table;
1376   unsigned int *hash_val;
1377   struct expr *expr;
1378 
1379   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1380   hash_val = XNEWVEC (unsigned int, table->n_elems);
1381 
1382   for (i = 0; i < (int) table->size; i++)
1383     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1384       {
1385 	flat_table[expr->bitmap_index] = expr;
1386 	hash_val[expr->bitmap_index] = i;
1387       }
1388 
1389   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1390 	   name, table->size, table->n_elems);
1391 
1392   for (i = 0; i < (int) table->n_elems; i++)
1393     if (flat_table[i] != 0)
1394       {
1395 	expr = flat_table[i];
1396 	fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1397 		 expr->bitmap_index, hash_val[i], expr->max_distance);
1398 	print_rtl (file, expr->expr);
1399 	fprintf (file, "\n");
1400       }
1401 
1402   fprintf (file, "\n");
1403 
1404   free (flat_table);
1405   free (hash_val);
1406 }
1407 
1408 /* Record register first/last/block set information for REGNO in INSN.
1409 
1410    first_set records the first place in the block where the register
1411    is set and is used to compute "anticipatability".
1412 
1413    last_set records the last place in the block where the register
1414    is set and is used to compute "availability".
1415 
1416    last_bb records the block for which first_set and last_set are
1417    valid, as a quick test to invalidate them.  */
1418 
1419 static void
record_last_reg_set_info(rtx insn,int regno)1420 record_last_reg_set_info (rtx insn, int regno)
1421 {
1422   struct reg_avail_info *info = &reg_avail_info[regno];
1423   int luid = DF_INSN_LUID (insn);
1424 
1425   info->last_set = luid;
1426   if (info->last_bb != current_bb)
1427     {
1428       info->last_bb = current_bb;
1429       info->first_set = luid;
1430     }
1431 }
1432 
1433 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1434    Note we store a pair of elements in the list, so they have to be
1435    taken off pairwise.  */
1436 
1437 static void
canon_list_insert(rtx dest ATTRIBUTE_UNUSED,const_rtx x ATTRIBUTE_UNUSED,void * v_insn)1438 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx x ATTRIBUTE_UNUSED,
1439 		   void * v_insn)
1440 {
1441   rtx dest_addr, insn;
1442   int bb;
1443   modify_pair pair;
1444 
1445   while (GET_CODE (dest) == SUBREG
1446       || GET_CODE (dest) == ZERO_EXTRACT
1447       || GET_CODE (dest) == STRICT_LOW_PART)
1448     dest = XEXP (dest, 0);
1449 
1450   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1451      that function calls are assumed to clobber memory, but are handled
1452      elsewhere.  */
1453 
1454   if (! MEM_P (dest))
1455     return;
1456 
1457   dest_addr = get_addr (XEXP (dest, 0));
1458   dest_addr = canon_rtx (dest_addr);
1459   insn = (rtx) v_insn;
1460   bb = BLOCK_FOR_INSN (insn)->index;
1461 
1462   pair.dest = dest;
1463   pair.dest_addr = dest_addr;
1464   canon_modify_mem_list[bb].safe_push (pair);
1465 }
1466 
1467 /* Record memory modification information for INSN.  We do not actually care
1468    about the memory location(s) that are set, or even how they are set (consider
1469    a CALL_INSN).  We merely need to record which insns modify memory.  */
1470 
1471 static void
record_last_mem_set_info(rtx insn)1472 record_last_mem_set_info (rtx insn)
1473 {
1474   int bb = BLOCK_FOR_INSN (insn)->index;
1475 
1476   /* load_killed_in_block_p will handle the case of calls clobbering
1477      everything.  */
1478   modify_mem_list[bb].safe_push (insn);
1479   bitmap_set_bit (modify_mem_list_set, bb);
1480 
1481   if (CALL_P (insn))
1482     bitmap_set_bit (blocks_with_calls, bb);
1483   else
1484     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1485 }
1486 
1487 /* Called from compute_hash_table via note_stores to handle one
1488    SET or CLOBBER in an insn.  DATA is really the instruction in which
1489    the SET is taking place.  */
1490 
1491 static void
record_last_set_info(rtx dest,const_rtx setter ATTRIBUTE_UNUSED,void * data)1492 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1493 {
1494   rtx last_set_insn = (rtx) data;
1495 
1496   if (GET_CODE (dest) == SUBREG)
1497     dest = SUBREG_REG (dest);
1498 
1499   if (REG_P (dest))
1500     record_last_reg_set_info (last_set_insn, REGNO (dest));
1501   else if (MEM_P (dest)
1502 	   /* Ignore pushes, they clobber nothing.  */
1503 	   && ! push_operand (dest, GET_MODE (dest)))
1504     record_last_mem_set_info (last_set_insn);
1505 }
1506 
1507 /* Top level function to create an expression hash table.
1508 
1509    Expression entries are placed in the hash table if
1510    - they are of the form (set (pseudo-reg) src),
1511    - src is something we want to perform GCSE on,
1512    - none of the operands are subsequently modified in the block
1513 
1514    Currently src must be a pseudo-reg or a const_int.
1515 
1516    TABLE is the table computed.  */
1517 
1518 static void
compute_hash_table_work(struct hash_table_d * table)1519 compute_hash_table_work (struct hash_table_d *table)
1520 {
1521   int i;
1522 
1523   /* re-Cache any INSN_LIST nodes we have allocated.  */
1524   clear_modify_mem_tables ();
1525   /* Some working arrays used to track first and last set in each block.  */
1526   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1527 
1528   for (i = 0; i < max_reg_num (); ++i)
1529     reg_avail_info[i].last_bb = NULL;
1530 
1531   FOR_EACH_BB (current_bb)
1532     {
1533       rtx insn;
1534       unsigned int regno;
1535 
1536       /* First pass over the instructions records information used to
1537 	 determine when registers and memory are first and last set.  */
1538       FOR_BB_INSNS (current_bb, insn)
1539 	{
1540 	  if (!NONDEBUG_INSN_P (insn))
1541 	    continue;
1542 
1543 	  if (CALL_P (insn))
1544 	    {
1545 	      hard_reg_set_iterator hrsi;
1546 	      EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call,
1547 					      0, regno, hrsi)
1548 		record_last_reg_set_info (insn, regno);
1549 
1550 	      if (! RTL_CONST_OR_PURE_CALL_P (insn))
1551 		record_last_mem_set_info (insn);
1552 	    }
1553 
1554 	  note_stores (PATTERN (insn), record_last_set_info, insn);
1555 	}
1556 
1557       /* The next pass builds the hash table.  */
1558       FOR_BB_INSNS (current_bb, insn)
1559 	if (NONDEBUG_INSN_P (insn))
1560 	  hash_scan_insn (insn, table);
1561     }
1562 
1563   free (reg_avail_info);
1564   reg_avail_info = NULL;
1565 }
1566 
1567 /* Allocate space for the set/expr hash TABLE.
1568    It is used to determine the number of buckets to use.  */
1569 
1570 static void
alloc_hash_table(struct hash_table_d * table)1571 alloc_hash_table (struct hash_table_d *table)
1572 {
1573   int n;
1574 
1575   n = get_max_insn_count ();
1576 
1577   table->size = n / 4;
1578   if (table->size < 11)
1579     table->size = 11;
1580 
1581   /* Attempt to maintain efficient use of hash table.
1582      Making it an odd number is simplest for now.
1583      ??? Later take some measurements.  */
1584   table->size |= 1;
1585   n = table->size * sizeof (struct expr *);
1586   table->table = GNEWVAR (struct expr *, n);
1587 }
1588 
1589 /* Free things allocated by alloc_hash_table.  */
1590 
1591 static void
free_hash_table(struct hash_table_d * table)1592 free_hash_table (struct hash_table_d *table)
1593 {
1594   free (table->table);
1595 }
1596 
1597 /* Compute the expression hash table TABLE.  */
1598 
1599 static void
compute_hash_table(struct hash_table_d * table)1600 compute_hash_table (struct hash_table_d *table)
1601 {
1602   /* Initialize count of number of entries in hash table.  */
1603   table->n_elems = 0;
1604   memset (table->table, 0, table->size * sizeof (struct expr *));
1605 
1606   compute_hash_table_work (table);
1607 }
1608 
1609 /* Expression tracking support.  */
1610 
1611 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1612 static void
clear_modify_mem_tables(void)1613 clear_modify_mem_tables (void)
1614 {
1615   unsigned i;
1616   bitmap_iterator bi;
1617 
1618   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1619     {
1620       modify_mem_list[i].release ();
1621       canon_modify_mem_list[i].release ();
1622     }
1623   bitmap_clear (modify_mem_list_set);
1624   bitmap_clear (blocks_with_calls);
1625 }
1626 
1627 /* Release memory used by modify_mem_list_set.  */
1628 
1629 static void
free_modify_mem_tables(void)1630 free_modify_mem_tables (void)
1631 {
1632   clear_modify_mem_tables ();
1633   free (modify_mem_list);
1634   free (canon_modify_mem_list);
1635   modify_mem_list = 0;
1636   canon_modify_mem_list = 0;
1637 }
1638 
1639 /* For each block, compute whether X is transparent.  X is either an
1640    expression or an assignment [though we don't care which, for this context
1641    an assignment is treated as an expression].  For each block where an
1642    element of X is modified, reset the INDX bit in BMAP.  */
1643 
1644 static void
compute_transp(const_rtx x,int indx,sbitmap * bmap)1645 compute_transp (const_rtx x, int indx, sbitmap *bmap)
1646 {
1647   int i, j;
1648   enum rtx_code code;
1649   const char *fmt;
1650 
1651   /* repeat is used to turn tail-recursion into iteration since GCC
1652      can't do it when there's no return value.  */
1653  repeat:
1654 
1655   if (x == 0)
1656     return;
1657 
1658   code = GET_CODE (x);
1659   switch (code)
1660     {
1661     case REG:
1662 	{
1663 	  df_ref def;
1664 	  for (def = DF_REG_DEF_CHAIN (REGNO (x));
1665 	       def;
1666 	       def = DF_REF_NEXT_REG (def))
1667 	    bitmap_clear_bit (bmap[DF_REF_BB (def)->index], indx);
1668 	}
1669 
1670       return;
1671 
1672     case MEM:
1673       if (! MEM_READONLY_P (x))
1674 	{
1675 	  bitmap_iterator bi;
1676 	  unsigned bb_index;
1677 	  rtx x_addr;
1678 
1679 	  x_addr = get_addr (XEXP (x, 0));
1680 	  x_addr = canon_rtx (x_addr);
1681 
1682 	  /* First handle all the blocks with calls.  We don't need to
1683 	     do any list walking for them.  */
1684 	  EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
1685 	    {
1686 	      bitmap_clear_bit (bmap[bb_index], indx);
1687 	    }
1688 
1689 	  /* Now iterate over the blocks which have memory modifications
1690 	     but which do not have any calls.  */
1691 	  EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
1692 					  blocks_with_calls,
1693 					  0, bb_index, bi)
1694 	    {
1695 	      vec<modify_pair> list
1696 		= canon_modify_mem_list[bb_index];
1697 	      modify_pair *pair;
1698 	      unsigned ix;
1699 
1700 	      FOR_EACH_VEC_ELT_REVERSE (list, ix, pair)
1701 		{
1702 		  rtx dest = pair->dest;
1703 		  rtx dest_addr = pair->dest_addr;
1704 
1705 		  if (canon_true_dependence (dest, GET_MODE (dest),
1706 					     dest_addr, x, x_addr))
1707 		    bitmap_clear_bit (bmap[bb_index], indx);
1708 	        }
1709 	    }
1710 	}
1711 
1712       x = XEXP (x, 0);
1713       goto repeat;
1714 
1715     case PC:
1716     case CC0: /*FIXME*/
1717     case CONST:
1718     CASE_CONST_ANY:
1719     case SYMBOL_REF:
1720     case LABEL_REF:
1721     case ADDR_VEC:
1722     case ADDR_DIFF_VEC:
1723       return;
1724 
1725     default:
1726       break;
1727     }
1728 
1729   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1730     {
1731       if (fmt[i] == 'e')
1732 	{
1733 	  /* If we are about to do the last recursive call
1734 	     needed at this level, change it into iteration.
1735 	     This function is called enough to be worth it.  */
1736 	  if (i == 0)
1737 	    {
1738 	      x = XEXP (x, i);
1739 	      goto repeat;
1740 	    }
1741 
1742 	  compute_transp (XEXP (x, i), indx, bmap);
1743 	}
1744       else if (fmt[i] == 'E')
1745 	for (j = 0; j < XVECLEN (x, i); j++)
1746 	  compute_transp (XVECEXP (x, i, j), indx, bmap);
1747     }
1748 }
1749 
1750 /* Compute PRE+LCM working variables.  */
1751 
1752 /* Local properties of expressions.  */
1753 
1754 /* Nonzero for expressions that are transparent in the block.  */
1755 static sbitmap *transp;
1756 
1757 /* Nonzero for expressions that are computed (available) in the block.  */
1758 static sbitmap *comp;
1759 
1760 /* Nonzero for expressions that are locally anticipatable in the block.  */
1761 static sbitmap *antloc;
1762 
1763 /* Nonzero for expressions where this block is an optimal computation
1764    point.  */
1765 static sbitmap *pre_optimal;
1766 
1767 /* Nonzero for expressions which are redundant in a particular block.  */
1768 static sbitmap *pre_redundant;
1769 
1770 /* Nonzero for expressions which should be inserted on a specific edge.  */
1771 static sbitmap *pre_insert_map;
1772 
1773 /* Nonzero for expressions which should be deleted in a specific block.  */
1774 static sbitmap *pre_delete_map;
1775 
1776 /* Allocate vars used for PRE analysis.  */
1777 
1778 static void
alloc_pre_mem(int n_blocks,int n_exprs)1779 alloc_pre_mem (int n_blocks, int n_exprs)
1780 {
1781   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1782   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1783   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1784 
1785   pre_optimal = NULL;
1786   pre_redundant = NULL;
1787   pre_insert_map = NULL;
1788   pre_delete_map = NULL;
1789   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1790 
1791   /* pre_insert and pre_delete are allocated later.  */
1792 }
1793 
1794 /* Free vars used for PRE analysis.  */
1795 
1796 static void
free_pre_mem(void)1797 free_pre_mem (void)
1798 {
1799   sbitmap_vector_free (transp);
1800   sbitmap_vector_free (comp);
1801 
1802   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1803 
1804   if (pre_optimal)
1805     sbitmap_vector_free (pre_optimal);
1806   if (pre_redundant)
1807     sbitmap_vector_free (pre_redundant);
1808   if (pre_insert_map)
1809     sbitmap_vector_free (pre_insert_map);
1810   if (pre_delete_map)
1811     sbitmap_vector_free (pre_delete_map);
1812 
1813   transp = comp = NULL;
1814   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1815 }
1816 
1817 /* Remove certain expressions from anticipatable and transparent
1818    sets of basic blocks that have incoming abnormal edge.
1819    For PRE remove potentially trapping expressions to avoid placing
1820    them on abnormal edges.  For hoisting remove memory references that
1821    can be clobbered by calls.  */
1822 
1823 static void
prune_expressions(bool pre_p)1824 prune_expressions (bool pre_p)
1825 {
1826   sbitmap prune_exprs;
1827   struct expr *expr;
1828   unsigned int ui;
1829   basic_block bb;
1830 
1831   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
1832   bitmap_clear (prune_exprs);
1833   for (ui = 0; ui < expr_hash_table.size; ui++)
1834     {
1835       for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1836 	{
1837 	  /* Note potentially trapping expressions.  */
1838 	  if (may_trap_p (expr->expr))
1839 	    {
1840 	      bitmap_set_bit (prune_exprs, expr->bitmap_index);
1841 	      continue;
1842 	    }
1843 
1844 	  if (!pre_p && MEM_P (expr->expr))
1845 	    /* Note memory references that can be clobbered by a call.
1846 	       We do not split abnormal edges in hoisting, so would
1847 	       a memory reference get hoisted along an abnormal edge,
1848 	       it would be placed /before/ the call.  Therefore, only
1849 	       constant memory references can be hoisted along abnormal
1850 	       edges.  */
1851 	    {
1852 	      if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF
1853 		  && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0)))
1854 		continue;
1855 
1856 	      if (MEM_READONLY_P (expr->expr)
1857 		  && !MEM_VOLATILE_P (expr->expr)
1858 		  && MEM_NOTRAP_P (expr->expr))
1859 		/* Constant memory reference, e.g., a PIC address.  */
1860 		continue;
1861 
1862 	      /* ??? Optimally, we would use interprocedural alias
1863 		 analysis to determine if this mem is actually killed
1864 		 by this call.  */
1865 
1866 	      bitmap_set_bit (prune_exprs, expr->bitmap_index);
1867 	    }
1868 	}
1869     }
1870 
1871   FOR_EACH_BB (bb)
1872     {
1873       edge e;
1874       edge_iterator ei;
1875 
1876       /* If the current block is the destination of an abnormal edge, we
1877 	 kill all trapping (for PRE) and memory (for hoist) expressions
1878 	 because we won't be able to properly place the instruction on
1879 	 the edge.  So make them neither anticipatable nor transparent.
1880 	 This is fairly conservative.
1881 
1882 	 ??? For hoisting it may be necessary to check for set-and-jump
1883 	 instructions here, not just for abnormal edges.  The general problem
1884 	 is that when an expression cannot not be placed right at the end of
1885 	 a basic block we should account for any side-effects of a subsequent
1886 	 jump instructions that could clobber the expression.  It would
1887 	 be best to implement this check along the lines of
1888 	 should_hoist_expr_to_dom where the target block is already known
1889 	 and, hence, there's no need to conservatively prune expressions on
1890 	 "intermediate" set-and-jump instructions.  */
1891       FOR_EACH_EDGE (e, ei, bb->preds)
1892 	if ((e->flags & EDGE_ABNORMAL)
1893 	    && (pre_p || CALL_P (BB_END (e->src))))
1894 	  {
1895 	    bitmap_and_compl (antloc[bb->index],
1896 				antloc[bb->index], prune_exprs);
1897 	    bitmap_and_compl (transp[bb->index],
1898 				transp[bb->index], prune_exprs);
1899 	    break;
1900 	  }
1901     }
1902 
1903   sbitmap_free (prune_exprs);
1904 }
1905 
1906 /* It may be necessary to insert a large number of insns on edges to
1907    make the existing occurrences of expressions fully redundant.  This
1908    routine examines the set of insertions and deletions and if the ratio
1909    of insertions to deletions is too high for a particular expression, then
1910    the expression is removed from the insertion/deletion sets.
1911 
1912    N_ELEMS is the number of elements in the hash table.  */
1913 
1914 static void
prune_insertions_deletions(int n_elems)1915 prune_insertions_deletions (int n_elems)
1916 {
1917   sbitmap_iterator sbi;
1918   sbitmap prune_exprs;
1919 
1920   /* We always use I to iterate over blocks/edges and J to iterate over
1921      expressions.  */
1922   unsigned int i, j;
1923 
1924   /* Counts for the number of times an expression needs to be inserted and
1925      number of times an expression can be removed as a result.  */
1926   int *insertions = GCNEWVEC (int, n_elems);
1927   int *deletions = GCNEWVEC (int, n_elems);
1928 
1929   /* Set of expressions which require too many insertions relative to
1930      the number of deletions achieved.  We will prune these out of the
1931      insertion/deletion sets.  */
1932   prune_exprs = sbitmap_alloc (n_elems);
1933   bitmap_clear (prune_exprs);
1934 
1935   /* Iterate over the edges counting the number of times each expression
1936      needs to be inserted.  */
1937   for (i = 0; i < (unsigned) n_edges; i++)
1938     {
1939       EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1940 	insertions[j]++;
1941     }
1942 
1943   /* Similarly for deletions, but those occur in blocks rather than on
1944      edges.  */
1945   for (i = 0; i < (unsigned) last_basic_block; i++)
1946     {
1947       EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1948 	deletions[j]++;
1949     }
1950 
1951   /* Now that we have accurate counts, iterate over the elements in the
1952      hash table and see if any need too many insertions relative to the
1953      number of evaluations that can be removed.  If so, mark them in
1954      PRUNE_EXPRS.  */
1955   for (j = 0; j < (unsigned) n_elems; j++)
1956     if (deletions[j]
1957 	&& ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
1958       bitmap_set_bit (prune_exprs, j);
1959 
1960   /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
1961   EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1962     {
1963       for (i = 0; i < (unsigned) n_edges; i++)
1964 	bitmap_clear_bit (pre_insert_map[i], j);
1965 
1966       for (i = 0; i < (unsigned) last_basic_block; i++)
1967 	bitmap_clear_bit (pre_delete_map[i], j);
1968     }
1969 
1970   sbitmap_free (prune_exprs);
1971   free (insertions);
1972   free (deletions);
1973 }
1974 
1975 /* Top level routine to do the dataflow analysis needed by PRE.  */
1976 
1977 static struct edge_list *
compute_pre_data(void)1978 compute_pre_data (void)
1979 {
1980   struct edge_list *edge_list;
1981   basic_block bb;
1982 
1983   compute_local_properties (transp, comp, antloc, &expr_hash_table);
1984   prune_expressions (true);
1985   bitmap_vector_clear (ae_kill, last_basic_block);
1986 
1987   /* Compute ae_kill for each basic block using:
1988 
1989      ~(TRANSP | COMP)
1990   */
1991 
1992   FOR_EACH_BB (bb)
1993     {
1994       bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1995       bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1996     }
1997 
1998   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1999 			    ae_kill, &pre_insert_map, &pre_delete_map);
2000   sbitmap_vector_free (antloc);
2001   antloc = NULL;
2002   sbitmap_vector_free (ae_kill);
2003   ae_kill = NULL;
2004 
2005   prune_insertions_deletions (expr_hash_table.n_elems);
2006 
2007   return edge_list;
2008 }
2009 
2010 /* PRE utilities */
2011 
2012 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2013    block BB.
2014 
2015    VISITED is a pointer to a working buffer for tracking which BB's have
2016    been visited.  It is NULL for the top-level call.
2017 
2018    We treat reaching expressions that go through blocks containing the same
2019    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
2020    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2021    2 as not reaching.  The intent is to improve the probability of finding
2022    only one reaching expression and to reduce register lifetimes by picking
2023    the closest such expression.  */
2024 
2025 static int
pre_expr_reaches_here_p_work(basic_block occr_bb,struct expr * expr,basic_block bb,char * visited)2026 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr,
2027 			      basic_block bb, char *visited)
2028 {
2029   edge pred;
2030   edge_iterator ei;
2031 
2032   FOR_EACH_EDGE (pred, ei, bb->preds)
2033     {
2034       basic_block pred_bb = pred->src;
2035 
2036       if (pred->src == ENTRY_BLOCK_PTR
2037 	  /* Has predecessor has already been visited?  */
2038 	  || visited[pred_bb->index])
2039 	;/* Nothing to do.  */
2040 
2041       /* Does this predecessor generate this expression?  */
2042       else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
2043 	{
2044 	  /* Is this the occurrence we're looking for?
2045 	     Note that there's only one generating occurrence per block
2046 	     so we just need to check the block number.  */
2047 	  if (occr_bb == pred_bb)
2048 	    return 1;
2049 
2050 	  visited[pred_bb->index] = 1;
2051 	}
2052       /* Ignore this predecessor if it kills the expression.  */
2053       else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2054 	visited[pred_bb->index] = 1;
2055 
2056       /* Neither gen nor kill.  */
2057       else
2058 	{
2059 	  visited[pred_bb->index] = 1;
2060 	  if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2061 	    return 1;
2062 	}
2063     }
2064 
2065   /* All paths have been checked.  */
2066   return 0;
2067 }
2068 
2069 /* The wrapper for pre_expr_reaches_here_work that ensures that any
2070    memory allocated for that function is returned.  */
2071 
2072 static int
pre_expr_reaches_here_p(basic_block occr_bb,struct expr * expr,basic_block bb)2073 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2074 {
2075   int rval;
2076   char *visited = XCNEWVEC (char, last_basic_block);
2077 
2078   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2079 
2080   free (visited);
2081   return rval;
2082 }
2083 
2084 /* Generate RTL to copy an EXPR to its `reaching_reg' and return it.  */
2085 
2086 static rtx
process_insert_insn(struct expr * expr)2087 process_insert_insn (struct expr *expr)
2088 {
2089   rtx reg = expr->reaching_reg;
2090   /* Copy the expression to make sure we don't have any sharing issues.  */
2091   rtx exp = copy_rtx (expr->expr);
2092   rtx pat;
2093 
2094   start_sequence ();
2095 
2096   /* If the expression is something that's an operand, like a constant,
2097      just copy it to a register.  */
2098   if (general_operand (exp, GET_MODE (reg)))
2099     emit_move_insn (reg, exp);
2100 
2101   /* Otherwise, make a new insn to compute this expression and make sure the
2102      insn will be recognized (this also adds any needed CLOBBERs).  */
2103   else
2104     {
2105       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
2106 
2107       if (insn_invalid_p (insn, false))
2108 	gcc_unreachable ();
2109     }
2110 
2111   pat = get_insns ();
2112   end_sequence ();
2113 
2114   return pat;
2115 }
2116 
2117 /* Add EXPR to the end of basic block BB.
2118 
2119    This is used by both the PRE and code hoisting.  */
2120 
2121 static void
insert_insn_end_basic_block(struct expr * expr,basic_block bb)2122 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2123 {
2124   rtx insn = BB_END (bb);
2125   rtx new_insn;
2126   rtx reg = expr->reaching_reg;
2127   int regno = REGNO (reg);
2128   rtx pat, pat_end;
2129 
2130   pat = process_insert_insn (expr);
2131   gcc_assert (pat && INSN_P (pat));
2132 
2133   pat_end = pat;
2134   while (NEXT_INSN (pat_end) != NULL_RTX)
2135     pat_end = NEXT_INSN (pat_end);
2136 
2137   /* If the last insn is a jump, insert EXPR in front [taking care to
2138      handle cc0, etc. properly].  Similarly we need to care trapping
2139      instructions in presence of non-call exceptions.  */
2140 
2141   if (JUMP_P (insn)
2142       || (NONJUMP_INSN_P (insn)
2143 	  && (!single_succ_p (bb)
2144 	      || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2145     {
2146 #ifdef HAVE_cc0
2147       rtx note;
2148 #endif
2149 
2150       /* If this is a jump table, then we can't insert stuff here.  Since
2151 	 we know the previous real insn must be the tablejump, we insert
2152 	 the new instruction just before the tablejump.  */
2153       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
2154 	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2155 	insn = prev_active_insn (insn);
2156 
2157 #ifdef HAVE_cc0
2158       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
2159 	 if cc0 isn't set.  */
2160       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
2161       if (note)
2162 	insn = XEXP (note, 0);
2163       else
2164 	{
2165 	  rtx maybe_cc0_setter = prev_nonnote_insn (insn);
2166 	  if (maybe_cc0_setter
2167 	      && INSN_P (maybe_cc0_setter)
2168 	      && sets_cc0_p (PATTERN (maybe_cc0_setter)))
2169 	    insn = maybe_cc0_setter;
2170 	}
2171 #endif
2172       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2173       new_insn = emit_insn_before_noloc (pat, insn, bb);
2174     }
2175 
2176   /* Likewise if the last insn is a call, as will happen in the presence
2177      of exception handling.  */
2178   else if (CALL_P (insn)
2179 	   && (!single_succ_p (bb)
2180 	       || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2181     {
2182       /* Keeping in mind targets with small register classes and parameters
2183          in registers, we search backward and place the instructions before
2184 	 the first parameter is loaded.  Do this for everyone for consistency
2185 	 and a presumption that we'll get better code elsewhere as well.  */
2186 
2187       /* Since different machines initialize their parameter registers
2188 	 in different orders, assume nothing.  Collect the set of all
2189 	 parameter registers.  */
2190       insn = find_first_parameter_load (insn, BB_HEAD (bb));
2191 
2192       /* If we found all the parameter loads, then we want to insert
2193 	 before the first parameter load.
2194 
2195 	 If we did not find all the parameter loads, then we might have
2196 	 stopped on the head of the block, which could be a CODE_LABEL.
2197 	 If we inserted before the CODE_LABEL, then we would be putting
2198 	 the insn in the wrong basic block.  In that case, put the insn
2199 	 after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2200       while (LABEL_P (insn)
2201 	     || NOTE_INSN_BASIC_BLOCK_P (insn))
2202 	insn = NEXT_INSN (insn);
2203 
2204       new_insn = emit_insn_before_noloc (pat, insn, bb);
2205     }
2206   else
2207     new_insn = emit_insn_after_noloc (pat, insn, bb);
2208 
2209   while (1)
2210     {
2211       if (INSN_P (pat))
2212 	add_label_notes (PATTERN (pat), new_insn);
2213       if (pat == pat_end)
2214 	break;
2215       pat = NEXT_INSN (pat);
2216     }
2217 
2218   gcse_create_count++;
2219 
2220   if (dump_file)
2221     {
2222       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2223 	       bb->index, INSN_UID (new_insn));
2224       fprintf (dump_file, "copying expression %d to reg %d\n",
2225 	       expr->bitmap_index, regno);
2226     }
2227 }
2228 
2229 /* Insert partially redundant expressions on edges in the CFG to make
2230    the expressions fully redundant.  */
2231 
2232 static int
pre_edge_insert(struct edge_list * edge_list,struct expr ** index_map)2233 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2234 {
2235   int e, i, j, num_edges, set_size, did_insert = 0;
2236   sbitmap *inserted;
2237 
2238   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2239      if it reaches any of the deleted expressions.  */
2240 
2241   set_size = pre_insert_map[0]->size;
2242   num_edges = NUM_EDGES (edge_list);
2243   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2244   bitmap_vector_clear (inserted, num_edges);
2245 
2246   for (e = 0; e < num_edges; e++)
2247     {
2248       int indx;
2249       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2250 
2251       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2252 	{
2253 	  SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2254 
2255 	  for (j = indx;
2256 	       insert && j < (int) expr_hash_table.n_elems;
2257 	       j++, insert >>= 1)
2258 	    if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2259 	      {
2260 		struct expr *expr = index_map[j];
2261 		struct occr *occr;
2262 
2263 		/* Now look at each deleted occurrence of this expression.  */
2264 		for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2265 		  {
2266 		    if (! occr->deleted_p)
2267 		      continue;
2268 
2269 		    /* Insert this expression on this edge if it would
2270 		       reach the deleted occurrence in BB.  */
2271 		    if (!bitmap_bit_p (inserted[e], j))
2272 		      {
2273 			rtx insn;
2274 			edge eg = INDEX_EDGE (edge_list, e);
2275 
2276 			/* We can't insert anything on an abnormal and
2277 			   critical edge, so we insert the insn at the end of
2278 			   the previous block. There are several alternatives
2279 			   detailed in Morgans book P277 (sec 10.5) for
2280 			   handling this situation.  This one is easiest for
2281 			   now.  */
2282 
2283 			if (eg->flags & EDGE_ABNORMAL)
2284 			  insert_insn_end_basic_block (index_map[j], bb);
2285 			else
2286 			  {
2287 			    insn = process_insert_insn (index_map[j]);
2288 			    insert_insn_on_edge (insn, eg);
2289 			  }
2290 
2291 			if (dump_file)
2292 			  {
2293 			    fprintf (dump_file, "PRE: edge (%d,%d), ",
2294 				     bb->index,
2295 				     INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2296 			    fprintf (dump_file, "copy expression %d\n",
2297 				     expr->bitmap_index);
2298 			  }
2299 
2300 			update_ld_motion_stores (expr);
2301 			bitmap_set_bit (inserted[e], j);
2302 			did_insert = 1;
2303 			gcse_create_count++;
2304 		      }
2305 		  }
2306 	      }
2307 	}
2308     }
2309 
2310   sbitmap_vector_free (inserted);
2311   return did_insert;
2312 }
2313 
2314 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2315    Given "old_reg <- expr" (INSN), instead of adding after it
2316      reaching_reg <- old_reg
2317    it's better to do the following:
2318      reaching_reg <- expr
2319      old_reg      <- reaching_reg
2320    because this way copy propagation can discover additional PRE
2321    opportunities.  But if this fails, we try the old way.
2322    When "expr" is a store, i.e.
2323    given "MEM <- old_reg", instead of adding after it
2324      reaching_reg <- old_reg
2325    it's better to add it before as follows:
2326      reaching_reg <- old_reg
2327      MEM          <- reaching_reg.  */
2328 
2329 static void
pre_insert_copy_insn(struct expr * expr,rtx insn)2330 pre_insert_copy_insn (struct expr *expr, rtx insn)
2331 {
2332   rtx reg = expr->reaching_reg;
2333   int regno = REGNO (reg);
2334   int indx = expr->bitmap_index;
2335   rtx pat = PATTERN (insn);
2336   rtx set, first_set, new_insn;
2337   rtx old_reg;
2338   int i;
2339 
2340   /* This block matches the logic in hash_scan_insn.  */
2341   switch (GET_CODE (pat))
2342     {
2343     case SET:
2344       set = pat;
2345       break;
2346 
2347     case PARALLEL:
2348       /* Search through the parallel looking for the set whose
2349 	 source was the expression that we're interested in.  */
2350       first_set = NULL_RTX;
2351       set = NULL_RTX;
2352       for (i = 0; i < XVECLEN (pat, 0); i++)
2353 	{
2354 	  rtx x = XVECEXP (pat, 0, i);
2355 	  if (GET_CODE (x) == SET)
2356 	    {
2357 	      /* If the source was a REG_EQUAL or REG_EQUIV note, we
2358 		 may not find an equivalent expression, but in this
2359 		 case the PARALLEL will have a single set.  */
2360 	      if (first_set == NULL_RTX)
2361 		first_set = x;
2362 	      if (expr_equiv_p (SET_SRC (x), expr->expr))
2363 	        {
2364 	          set = x;
2365 	          break;
2366 	        }
2367 	    }
2368 	}
2369 
2370       gcc_assert (first_set);
2371       if (set == NULL_RTX)
2372         set = first_set;
2373       break;
2374 
2375     default:
2376       gcc_unreachable ();
2377     }
2378 
2379   if (REG_P (SET_DEST (set)))
2380     {
2381       old_reg = SET_DEST (set);
2382       /* Check if we can modify the set destination in the original insn.  */
2383       if (validate_change (insn, &SET_DEST (set), reg, 0))
2384         {
2385           new_insn = gen_move_insn (old_reg, reg);
2386           new_insn = emit_insn_after (new_insn, insn);
2387         }
2388       else
2389         {
2390           new_insn = gen_move_insn (reg, old_reg);
2391           new_insn = emit_insn_after (new_insn, insn);
2392         }
2393     }
2394   else /* This is possible only in case of a store to memory.  */
2395     {
2396       old_reg = SET_SRC (set);
2397       new_insn = gen_move_insn (reg, old_reg);
2398 
2399       /* Check if we can modify the set source in the original insn.  */
2400       if (validate_change (insn, &SET_SRC (set), reg, 0))
2401         new_insn = emit_insn_before (new_insn, insn);
2402       else
2403         new_insn = emit_insn_after (new_insn, insn);
2404     }
2405 
2406   gcse_create_count++;
2407 
2408   if (dump_file)
2409     fprintf (dump_file,
2410 	     "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2411 	      BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2412 	      INSN_UID (insn), regno);
2413 }
2414 
2415 /* Copy available expressions that reach the redundant expression
2416    to `reaching_reg'.  */
2417 
2418 static void
pre_insert_copies(void)2419 pre_insert_copies (void)
2420 {
2421   unsigned int i, added_copy;
2422   struct expr *expr;
2423   struct occr *occr;
2424   struct occr *avail;
2425 
2426   /* For each available expression in the table, copy the result to
2427      `reaching_reg' if the expression reaches a deleted one.
2428 
2429      ??? The current algorithm is rather brute force.
2430      Need to do some profiling.  */
2431 
2432   for (i = 0; i < expr_hash_table.size; i++)
2433     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2434       {
2435 	/* If the basic block isn't reachable, PPOUT will be TRUE.  However,
2436 	   we don't want to insert a copy here because the expression may not
2437 	   really be redundant.  So only insert an insn if the expression was
2438 	   deleted.  This test also avoids further processing if the
2439 	   expression wasn't deleted anywhere.  */
2440 	if (expr->reaching_reg == NULL)
2441 	  continue;
2442 
2443 	/* Set when we add a copy for that expression.  */
2444 	added_copy = 0;
2445 
2446 	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2447 	  {
2448 	    if (! occr->deleted_p)
2449 	      continue;
2450 
2451 	    for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2452 	      {
2453 		rtx insn = avail->insn;
2454 
2455 		/* No need to handle this one if handled already.  */
2456 		if (avail->copied_p)
2457 		  continue;
2458 
2459 		/* Don't handle this one if it's a redundant one.  */
2460 		if (INSN_DELETED_P (insn))
2461 		  continue;
2462 
2463 		/* Or if the expression doesn't reach the deleted one.  */
2464 		if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2465 					       expr,
2466 					       BLOCK_FOR_INSN (occr->insn)))
2467 		  continue;
2468 
2469                 added_copy = 1;
2470 
2471 		/* Copy the result of avail to reaching_reg.  */
2472 		pre_insert_copy_insn (expr, insn);
2473 		avail->copied_p = 1;
2474 	      }
2475 	  }
2476 
2477 	  if (added_copy)
2478             update_ld_motion_stores (expr);
2479       }
2480 }
2481 
2482 /* Emit move from SRC to DEST noting the equivalence with expression computed
2483    in INSN.  */
2484 
2485 static rtx
gcse_emit_move_after(rtx dest,rtx src,rtx insn)2486 gcse_emit_move_after (rtx dest, rtx src, rtx insn)
2487 {
2488   rtx new_rtx;
2489   rtx set = single_set (insn), set2;
2490   rtx note;
2491   rtx eqv = NULL_RTX;
2492 
2493   /* This should never fail since we're creating a reg->reg copy
2494      we've verified to be valid.  */
2495 
2496   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2497 
2498   /* Note the equivalence for local CSE pass.  Take the note from the old
2499      set if there was one.  Otherwise record the SET_SRC from the old set
2500      unless DEST is also an operand of the SET_SRC.  */
2501   set2 = single_set (new_rtx);
2502   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2503     return new_rtx;
2504   if ((note = find_reg_equal_equiv_note (insn)))
2505     eqv = XEXP (note, 0);
2506   else if (! REG_P (dest)
2507 	   || ! reg_mentioned_p (dest, SET_SRC (set)))
2508     eqv = SET_SRC (set);
2509 
2510   if (eqv != NULL_RTX)
2511     set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2512 
2513   return new_rtx;
2514 }
2515 
2516 /* Delete redundant computations.
2517    Deletion is done by changing the insn to copy the `reaching_reg' of
2518    the expression into the result of the SET.  It is left to later passes
2519    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
2520 
2521    Return nonzero if a change is made.  */
2522 
2523 static int
pre_delete(void)2524 pre_delete (void)
2525 {
2526   unsigned int i;
2527   int changed;
2528   struct expr *expr;
2529   struct occr *occr;
2530 
2531   changed = 0;
2532   for (i = 0; i < expr_hash_table.size; i++)
2533     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2534       {
2535 	int indx = expr->bitmap_index;
2536 
2537 	/* We only need to search antic_occr since we require ANTLOC != 0.  */
2538 	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2539 	  {
2540 	    rtx insn = occr->insn;
2541 	    rtx set;
2542 	    basic_block bb = BLOCK_FOR_INSN (insn);
2543 
2544 	    /* We only delete insns that have a single_set.  */
2545 	    if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2546 		&& (set = single_set (insn)) != 0
2547                 && dbg_cnt (pre_insn))
2548 	      {
2549 		/* Create a pseudo-reg to store the result of reaching
2550 		   expressions into.  Get the mode for the new pseudo from
2551 		   the mode of the original destination pseudo.  */
2552 		if (expr->reaching_reg == NULL)
2553 		  expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2554 
2555 		gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2556 		delete_insn (insn);
2557 		occr->deleted_p = 1;
2558 		changed = 1;
2559 		gcse_subst_count++;
2560 
2561 		if (dump_file)
2562 		  {
2563 		    fprintf (dump_file,
2564 			     "PRE: redundant insn %d (expression %d) in ",
2565 			       INSN_UID (insn), indx);
2566 		    fprintf (dump_file, "bb %d, reaching reg is %d\n",
2567 			     bb->index, REGNO (expr->reaching_reg));
2568 		  }
2569 	      }
2570 	  }
2571       }
2572 
2573   return changed;
2574 }
2575 
2576 /* Perform GCSE optimizations using PRE.
2577    This is called by one_pre_gcse_pass after all the dataflow analysis
2578    has been done.
2579 
2580    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2581    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2582    Compiler Design and Implementation.
2583 
2584    ??? A new pseudo reg is created to hold the reaching expression.  The nice
2585    thing about the classical approach is that it would try to use an existing
2586    reg.  If the register can't be adequately optimized [i.e. we introduce
2587    reload problems], one could add a pass here to propagate the new register
2588    through the block.
2589 
2590    ??? We don't handle single sets in PARALLELs because we're [currently] not
2591    able to copy the rest of the parallel when we insert copies to create full
2592    redundancies from partial redundancies.  However, there's no reason why we
2593    can't handle PARALLELs in the cases where there are no partial
2594    redundancies.  */
2595 
2596 static int
pre_gcse(struct edge_list * edge_list)2597 pre_gcse (struct edge_list *edge_list)
2598 {
2599   unsigned int i;
2600   int did_insert, changed;
2601   struct expr **index_map;
2602   struct expr *expr;
2603 
2604   /* Compute a mapping from expression number (`bitmap_index') to
2605      hash table entry.  */
2606 
2607   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2608   for (i = 0; i < expr_hash_table.size; i++)
2609     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2610       index_map[expr->bitmap_index] = expr;
2611 
2612   /* Delete the redundant insns first so that
2613      - we know what register to use for the new insns and for the other
2614        ones with reaching expressions
2615      - we know which insns are redundant when we go to create copies  */
2616 
2617   changed = pre_delete ();
2618   did_insert = pre_edge_insert (edge_list, index_map);
2619 
2620   /* In other places with reaching expressions, copy the expression to the
2621      specially allocated pseudo-reg that reaches the redundant expr.  */
2622   pre_insert_copies ();
2623   if (did_insert)
2624     {
2625       commit_edge_insertions ();
2626       changed = 1;
2627     }
2628 
2629   free (index_map);
2630   return changed;
2631 }
2632 
2633 /* Top level routine to perform one PRE GCSE pass.
2634 
2635    Return nonzero if a change was made.  */
2636 
2637 static int
one_pre_gcse_pass(void)2638 one_pre_gcse_pass (void)
2639 {
2640   int changed = 0;
2641 
2642   gcse_subst_count = 0;
2643   gcse_create_count = 0;
2644 
2645   /* Return if there's nothing to do, or it is too expensive.  */
2646   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
2647       || is_too_expensive (_("PRE disabled")))
2648     return 0;
2649 
2650   /* We need alias.  */
2651   init_alias_analysis ();
2652 
2653   bytes_used = 0;
2654   gcc_obstack_init (&gcse_obstack);
2655   alloc_gcse_mem ();
2656 
2657   alloc_hash_table (&expr_hash_table);
2658   add_noreturn_fake_exit_edges ();
2659   if (flag_gcse_lm)
2660     compute_ld_motion_mems ();
2661 
2662   compute_hash_table (&expr_hash_table);
2663   if (flag_gcse_lm)
2664     trim_ld_motion_mems ();
2665   if (dump_file)
2666     dump_hash_table (dump_file, "Expression", &expr_hash_table);
2667 
2668   if (expr_hash_table.n_elems > 0)
2669     {
2670       struct edge_list *edge_list;
2671       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2672       edge_list = compute_pre_data ();
2673       changed |= pre_gcse (edge_list);
2674       free_edge_list (edge_list);
2675       free_pre_mem ();
2676     }
2677 
2678   if (flag_gcse_lm)
2679     free_ld_motion_mems ();
2680   remove_fake_exit_edges ();
2681   free_hash_table (&expr_hash_table);
2682 
2683   free_gcse_mem ();
2684   obstack_free (&gcse_obstack, NULL);
2685 
2686   /* We are finished with alias.  */
2687   end_alias_analysis ();
2688 
2689   if (dump_file)
2690     {
2691       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2692 	       current_function_name (), n_basic_blocks, bytes_used);
2693       fprintf (dump_file, "%d substs, %d insns created\n",
2694 	       gcse_subst_count, gcse_create_count);
2695     }
2696 
2697   return changed;
2698 }
2699 
2700 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2701    to INSN.  If such notes are added to an insn which references a
2702    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
2703    that note, because the following loop optimization pass requires
2704    them.  */
2705 
2706 /* ??? If there was a jump optimization pass after gcse and before loop,
2707    then we would not need to do this here, because jump would add the
2708    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2709 
2710 static void
add_label_notes(rtx x,rtx insn)2711 add_label_notes (rtx x, rtx insn)
2712 {
2713   enum rtx_code code = GET_CODE (x);
2714   int i, j;
2715   const char *fmt;
2716 
2717   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2718     {
2719       /* This code used to ignore labels that referred to dispatch tables to
2720 	 avoid flow generating (slightly) worse code.
2721 
2722 	 We no longer ignore such label references (see LABEL_REF handling in
2723 	 mark_jump_label for additional information).  */
2724 
2725       /* There's no reason for current users to emit jump-insns with
2726 	 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2727 	 notes.  */
2728       gcc_assert (!JUMP_P (insn));
2729       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
2730 
2731       if (LABEL_P (XEXP (x, 0)))
2732 	LABEL_NUSES (XEXP (x, 0))++;
2733 
2734       return;
2735     }
2736 
2737   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2738     {
2739       if (fmt[i] == 'e')
2740 	add_label_notes (XEXP (x, i), insn);
2741       else if (fmt[i] == 'E')
2742 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2743 	  add_label_notes (XVECEXP (x, i, j), insn);
2744     }
2745 }
2746 
2747 /* Code Hoisting variables and subroutines.  */
2748 
2749 /* Very busy expressions.  */
2750 static sbitmap *hoist_vbein;
2751 static sbitmap *hoist_vbeout;
2752 
2753 /* ??? We could compute post dominators and run this algorithm in
2754    reverse to perform tail merging, doing so would probably be
2755    more effective than the tail merging code in jump.c.
2756 
2757    It's unclear if tail merging could be run in parallel with
2758    code hoisting.  It would be nice.  */
2759 
2760 /* Allocate vars used for code hoisting analysis.  */
2761 
2762 static void
alloc_code_hoist_mem(int n_blocks,int n_exprs)2763 alloc_code_hoist_mem (int n_blocks, int n_exprs)
2764 {
2765   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2766   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2767   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2768 
2769   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2770   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2771 }
2772 
2773 /* Free vars used for code hoisting analysis.  */
2774 
2775 static void
free_code_hoist_mem(void)2776 free_code_hoist_mem (void)
2777 {
2778   sbitmap_vector_free (antloc);
2779   sbitmap_vector_free (transp);
2780   sbitmap_vector_free (comp);
2781 
2782   sbitmap_vector_free (hoist_vbein);
2783   sbitmap_vector_free (hoist_vbeout);
2784 
2785   free_dominance_info (CDI_DOMINATORS);
2786 }
2787 
2788 /* Compute the very busy expressions at entry/exit from each block.
2789 
2790    An expression is very busy if all paths from a given point
2791    compute the expression.  */
2792 
2793 static void
compute_code_hoist_vbeinout(void)2794 compute_code_hoist_vbeinout (void)
2795 {
2796   int changed, passes;
2797   basic_block bb;
2798 
2799   bitmap_vector_clear (hoist_vbeout, last_basic_block);
2800   bitmap_vector_clear (hoist_vbein, last_basic_block);
2801 
2802   passes = 0;
2803   changed = 1;
2804 
2805   while (changed)
2806     {
2807       changed = 0;
2808 
2809       /* We scan the blocks in the reverse order to speed up
2810 	 the convergence.  */
2811       FOR_EACH_BB_REVERSE (bb)
2812 	{
2813 	  if (bb->next_bb != EXIT_BLOCK_PTR)
2814 	    {
2815 	      bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2816 					    hoist_vbein, bb);
2817 
2818 	      /* Include expressions in VBEout that are calculated
2819 		 in BB and available at its end.  */
2820 	      bitmap_ior (hoist_vbeout[bb->index],
2821 			      hoist_vbeout[bb->index], comp[bb->index]);
2822 	    }
2823 
2824 	  changed |= bitmap_or_and (hoist_vbein[bb->index],
2825 					      antloc[bb->index],
2826 					      hoist_vbeout[bb->index],
2827 					      transp[bb->index]);
2828 	}
2829 
2830       passes++;
2831     }
2832 
2833   if (dump_file)
2834     {
2835       fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2836 
2837       FOR_EACH_BB (bb)
2838         {
2839 	  fprintf (dump_file, "vbein (%d): ", bb->index);
2840 	  dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2841 	  fprintf (dump_file, "vbeout(%d): ", bb->index);
2842 	  dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2843 	}
2844     }
2845 }
2846 
2847 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
2848 
2849 static void
compute_code_hoist_data(void)2850 compute_code_hoist_data (void)
2851 {
2852   compute_local_properties (transp, comp, antloc, &expr_hash_table);
2853   prune_expressions (false);
2854   compute_code_hoist_vbeinout ();
2855   calculate_dominance_info (CDI_DOMINATORS);
2856   if (dump_file)
2857     fprintf (dump_file, "\n");
2858 }
2859 
2860 /* Update register pressure for BB when hoisting an expression from
2861    instruction FROM, if live ranges of inputs are shrunk.  Also
2862    maintain live_in information if live range of register referred
2863    in FROM is shrunk.
2864 
2865    Return 0 if register pressure doesn't change, otherwise return
2866    the number by which register pressure is decreased.
2867 
2868    NOTE: Register pressure won't be increased in this function.  */
2869 
2870 static int
update_bb_reg_pressure(basic_block bb,rtx from)2871 update_bb_reg_pressure (basic_block bb, rtx from)
2872 {
2873   rtx dreg, insn;
2874   basic_block succ_bb;
2875   df_ref *op, op_ref;
2876   edge succ;
2877   edge_iterator ei;
2878   int decreased_pressure = 0;
2879   int nregs;
2880   enum reg_class pressure_class;
2881 
2882   for (op = DF_INSN_USES (from); *op; op++)
2883     {
2884       dreg = DF_REF_REAL_REG (*op);
2885       /* The live range of register is shrunk only if it isn't:
2886 	 1. referred on any path from the end of this block to EXIT, or
2887 	 2. referred by insns other than FROM in this block.  */
2888       FOR_EACH_EDGE (succ, ei, bb->succs)
2889 	{
2890 	  succ_bb = succ->dest;
2891 	  if (succ_bb == EXIT_BLOCK_PTR)
2892 	    continue;
2893 
2894 	  if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
2895 	    break;
2896 	}
2897       if (succ != NULL)
2898 	continue;
2899 
2900       op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
2901       for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
2902 	{
2903 	  if (!DF_REF_INSN_INFO (op_ref))
2904 	    continue;
2905 
2906 	  insn = DF_REF_INSN (op_ref);
2907 	  if (BLOCK_FOR_INSN (insn) == bb
2908 	      && NONDEBUG_INSN_P (insn) && insn != from)
2909 	    break;
2910 	}
2911 
2912       pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
2913       /* Decrease register pressure and update live_in information for
2914 	 this block.  */
2915       if (!op_ref && pressure_class != NO_REGS)
2916 	{
2917 	  decreased_pressure += nregs;
2918 	  BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
2919 	  bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
2920 	}
2921     }
2922   return decreased_pressure;
2923 }
2924 
2925 /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
2926    flow graph, if it can reach BB unimpared.  Stop the search if the
2927    expression would need to be moved more than DISTANCE instructions.
2928 
2929    DISTANCE is the number of instructions through which EXPR can be
2930    hoisted up in flow graph.
2931 
2932    BB_SIZE points to an array which contains the number of instructions
2933    for each basic block.
2934 
2935    PRESSURE_CLASS and NREGS are register class and number of hard registers
2936    for storing EXPR.
2937 
2938    HOISTED_BBS points to a bitmap indicating basic blocks through which
2939    EXPR is hoisted.
2940 
2941    FROM is the instruction from which EXPR is hoisted.
2942 
2943    It's unclear exactly what Muchnick meant by "unimpared".  It seems
2944    to me that the expression must either be computed or transparent in
2945    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
2946    would allow the expression to be hoisted out of loops, even if
2947    the expression wasn't a loop invariant.
2948 
2949    Contrast this to reachability for PRE where an expression is
2950    considered reachable if *any* path reaches instead of *all*
2951    paths.  */
2952 
2953 static int
should_hoist_expr_to_dom(basic_block expr_bb,struct expr * expr,basic_block bb,sbitmap visited,int distance,int * bb_size,enum reg_class pressure_class,int * nregs,bitmap hoisted_bbs,rtx from)2954 should_hoist_expr_to_dom (basic_block expr_bb, struct expr *expr,
2955 			  basic_block bb, sbitmap visited, int distance,
2956 			  int *bb_size, enum reg_class pressure_class,
2957 			  int *nregs, bitmap hoisted_bbs, rtx from)
2958 {
2959   unsigned int i;
2960   edge pred;
2961   edge_iterator ei;
2962   sbitmap_iterator sbi;
2963   int visited_allocated_locally = 0;
2964   int decreased_pressure = 0;
2965 
2966   if (flag_ira_hoist_pressure)
2967     {
2968       /* Record old information of basic block BB when it is visited
2969 	 at the first time.  */
2970       if (!bitmap_bit_p (hoisted_bbs, bb->index))
2971 	{
2972 	  struct bb_data *data = BB_DATA (bb);
2973 	  bitmap_copy (data->backup, data->live_in);
2974 	  data->old_pressure = data->max_reg_pressure[pressure_class];
2975 	}
2976       decreased_pressure = update_bb_reg_pressure (bb, from);
2977     }
2978   /* Terminate the search if distance, for which EXPR is allowed to move,
2979      is exhausted.  */
2980   if (distance > 0)
2981     {
2982       if (flag_ira_hoist_pressure)
2983 	{
2984 	  /* Prefer to hoist EXPR if register pressure is decreased.  */
2985 	  if (decreased_pressure > *nregs)
2986 	    distance += bb_size[bb->index];
2987 	  /* Let EXPR be hoisted through basic block at no cost if one
2988 	     of following conditions is satisfied:
2989 
2990 	     1. The basic block has low register pressure.
2991 	     2. Register pressure won't be increases after hoisting EXPR.
2992 
2993 	     Constant expressions is handled conservatively, because
2994 	     hoisting constant expression aggressively results in worse
2995 	     code.  This decision is made by the observation of CSiBE
2996 	     on ARM target, while it has no obvious effect on other
2997 	     targets like x86, x86_64, mips and powerpc.  */
2998 	  else if (CONST_INT_P (expr->expr)
2999 		   || (BB_DATA (bb)->max_reg_pressure[pressure_class]
3000 			 >= ira_class_hard_regs_num[pressure_class]
3001 		       && decreased_pressure < *nregs))
3002 	    distance -= bb_size[bb->index];
3003 	}
3004       else
3005 	distance -= bb_size[bb->index];
3006 
3007       if (distance <= 0)
3008 	return 0;
3009     }
3010   else
3011     gcc_assert (distance == 0);
3012 
3013   if (visited == NULL)
3014     {
3015       visited_allocated_locally = 1;
3016       visited = sbitmap_alloc (last_basic_block);
3017       bitmap_clear (visited);
3018     }
3019 
3020   FOR_EACH_EDGE (pred, ei, bb->preds)
3021     {
3022       basic_block pred_bb = pred->src;
3023 
3024       if (pred->src == ENTRY_BLOCK_PTR)
3025 	break;
3026       else if (pred_bb == expr_bb)
3027 	continue;
3028       else if (bitmap_bit_p (visited, pred_bb->index))
3029 	continue;
3030       else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
3031 	break;
3032       /* Not killed.  */
3033       else
3034 	{
3035 	  bitmap_set_bit (visited, pred_bb->index);
3036 	  if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
3037 					  visited, distance, bb_size,
3038 					  pressure_class, nregs,
3039 					  hoisted_bbs, from))
3040 	    break;
3041 	}
3042     }
3043   if (visited_allocated_locally)
3044     {
3045       /* If EXPR can be hoisted to expr_bb, record basic blocks through
3046 	 which EXPR is hoisted in hoisted_bbs.  */
3047       if (flag_ira_hoist_pressure && !pred)
3048 	{
3049 	  /* Record the basic block from which EXPR is hoisted.  */
3050 	  bitmap_set_bit (visited, bb->index);
3051 	  EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3052 	    bitmap_set_bit (hoisted_bbs, i);
3053 	}
3054       sbitmap_free (visited);
3055     }
3056 
3057   return (pred == NULL);
3058 }
3059 
3060 /* Find occurrence in BB.  */
3061 
3062 static struct occr *
find_occr_in_bb(struct occr * occr,basic_block bb)3063 find_occr_in_bb (struct occr *occr, basic_block bb)
3064 {
3065   /* Find the right occurrence of this expression.  */
3066   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3067     occr = occr->next;
3068 
3069   return occr;
3070 }
3071 
3072 /* Actually perform code hoisting.
3073 
3074    The code hoisting pass can hoist multiple computations of the same
3075    expression along dominated path to a dominating basic block, like
3076    from b2/b3 to b1 as depicted below:
3077 
3078           b1      ------
3079           /\         |
3080          /  \        |
3081         bx   by   distance
3082        /      \      |
3083       /        \     |
3084      b2        b3 ------
3085 
3086    Unfortunately code hoisting generally extends the live range of an
3087    output pseudo register, which increases register pressure and hurts
3088    register allocation.  To address this issue, an attribute MAX_DISTANCE
3089    is computed and attached to each expression.  The attribute is computed
3090    from rtx cost of the corresponding expression and it's used to control
3091    how long the expression can be hoisted up in flow graph.  As the
3092    expression is hoisted up in flow graph, GCC decreases its DISTANCE
3093    and stops the hoist if DISTANCE reaches 0.  Code hoisting can decrease
3094    register pressure if live ranges of inputs are shrunk.
3095 
3096    Option "-fira-hoist-pressure" implements register pressure directed
3097    hoist based on upper method.  The rationale is:
3098      1. Calculate register pressure for each basic block by reusing IRA
3099 	facility.
3100      2. When expression is hoisted through one basic block, GCC checks
3101 	the change of live ranges for inputs/output.  The basic block's
3102 	register pressure will be increased because of extended live
3103 	range of output.  However, register pressure will be decreased
3104 	if the live ranges of inputs are shrunk.
3105      3. After knowing how hoisting affects register pressure, GCC prefers
3106 	to hoist the expression if it can decrease register pressure, by
3107 	increasing DISTANCE of the corresponding expression.
3108      4. If hoisting the expression increases register pressure, GCC checks
3109 	register pressure of the basic block and decrease DISTANCE only if
3110 	the register pressure is high.  In other words, expression will be
3111 	hoisted through at no cost if the basic block has low register
3112 	pressure.
3113      5. Update register pressure information for basic blocks through
3114 	which expression is hoisted.  */
3115 
3116 static int
hoist_code(void)3117 hoist_code (void)
3118 {
3119   basic_block bb, dominated;
3120   vec<basic_block> dom_tree_walk;
3121   unsigned int dom_tree_walk_index;
3122   vec<basic_block> domby;
3123   unsigned int i, j, k;
3124   struct expr **index_map;
3125   struct expr *expr;
3126   int *to_bb_head;
3127   int *bb_size;
3128   int changed = 0;
3129   struct bb_data *data;
3130   /* Basic blocks that have occurrences reachable from BB.  */
3131   bitmap from_bbs;
3132   /* Basic blocks through which expr is hoisted.  */
3133   bitmap hoisted_bbs = NULL;
3134   bitmap_iterator bi;
3135 
3136   /* Compute a mapping from expression number (`bitmap_index') to
3137      hash table entry.  */
3138 
3139   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
3140   for (i = 0; i < expr_hash_table.size; i++)
3141     for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3142       index_map[expr->bitmap_index] = expr;
3143 
3144   /* Calculate sizes of basic blocks and note how far
3145      each instruction is from the start of its block.  We then use this
3146      data to restrict distance an expression can travel.  */
3147 
3148   to_bb_head = XCNEWVEC (int, get_max_uid ());
3149   bb_size = XCNEWVEC (int, last_basic_block);
3150 
3151   FOR_EACH_BB (bb)
3152     {
3153       rtx insn;
3154       int to_head;
3155 
3156       to_head = 0;
3157       FOR_BB_INSNS (bb, insn)
3158 	{
3159 	  /* Don't count debug instructions to avoid them affecting
3160 	     decision choices.  */
3161 	  if (NONDEBUG_INSN_P (insn))
3162 	    to_bb_head[INSN_UID (insn)] = to_head++;
3163 	}
3164 
3165       bb_size[bb->index] = to_head;
3166     }
3167 
3168   gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
3169 	      && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
3170 		  == ENTRY_BLOCK_PTR->next_bb));
3171 
3172   from_bbs = BITMAP_ALLOC (NULL);
3173   if (flag_ira_hoist_pressure)
3174     hoisted_bbs = BITMAP_ALLOC (NULL);
3175 
3176   dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
3177 					    ENTRY_BLOCK_PTR->next_bb);
3178 
3179   /* Walk over each basic block looking for potentially hoistable
3180      expressions, nothing gets hoisted from the entry block.  */
3181   FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3182     {
3183       domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
3184 
3185       if (domby.length () == 0)
3186 	continue;
3187 
3188       /* Examine each expression that is very busy at the exit of this
3189 	 block.  These are the potentially hoistable expressions.  */
3190       for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3191 	{
3192 	  if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3193 	    {
3194 	      int nregs = 0;
3195 	      enum reg_class pressure_class = NO_REGS;
3196 	      /* Current expression.  */
3197 	      struct expr *expr = index_map[i];
3198 	      /* Number of occurrences of EXPR that can be hoisted to BB.  */
3199 	      int hoistable = 0;
3200 	      /* Occurrences reachable from BB.  */
3201 	      vec<occr_t> occrs_to_hoist = vNULL;
3202 	      /* We want to insert the expression into BB only once, so
3203 		 note when we've inserted it.  */
3204 	      int insn_inserted_p;
3205 	      occr_t occr;
3206 
3207 	      /* If an expression is computed in BB and is available at end of
3208 		 BB, hoist all occurrences dominated by BB to BB.  */
3209 	      if (bitmap_bit_p (comp[bb->index], i))
3210 		{
3211 		  occr = find_occr_in_bb (expr->antic_occr, bb);
3212 
3213 		  if (occr)
3214 		    {
3215 		      /* An occurrence might've been already deleted
3216 			 while processing a dominator of BB.  */
3217 		      if (!occr->deleted_p)
3218 			{
3219 			  gcc_assert (NONDEBUG_INSN_P (occr->insn));
3220 			  hoistable++;
3221 			}
3222 		    }
3223 		  else
3224 		    hoistable++;
3225 		}
3226 
3227 	      /* We've found a potentially hoistable expression, now
3228 		 we look at every block BB dominates to see if it
3229 		 computes the expression.  */
3230 	      FOR_EACH_VEC_ELT (domby, j, dominated)
3231 		{
3232 		  int max_distance;
3233 
3234 		  /* Ignore self dominance.  */
3235 		  if (bb == dominated)
3236 		    continue;
3237 		  /* We've found a dominated block, now see if it computes
3238 		     the busy expression and whether or not moving that
3239 		     expression to the "beginning" of that block is safe.  */
3240 		  if (!bitmap_bit_p (antloc[dominated->index], i))
3241 		    continue;
3242 
3243 		  occr = find_occr_in_bb (expr->antic_occr, dominated);
3244 		  gcc_assert (occr);
3245 
3246 		  /* An occurrence might've been already deleted
3247 		     while processing a dominator of BB.  */
3248 		  if (occr->deleted_p)
3249 		    continue;
3250 		  gcc_assert (NONDEBUG_INSN_P (occr->insn));
3251 
3252 		  max_distance = expr->max_distance;
3253 		  if (max_distance > 0)
3254 		    /* Adjust MAX_DISTANCE to account for the fact that
3255 		       OCCR won't have to travel all of DOMINATED, but
3256 		       only part of it.  */
3257 		    max_distance += (bb_size[dominated->index]
3258 				     - to_bb_head[INSN_UID (occr->insn)]);
3259 
3260 		  pressure_class = get_pressure_class_and_nregs (occr->insn,
3261 								 &nregs);
3262 
3263 		  /* Note if the expression should be hoisted from the dominated
3264 		     block to BB if it can reach DOMINATED unimpared.
3265 
3266 		     Keep track of how many times this expression is hoistable
3267 		     from a dominated block into BB.  */
3268 		  if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3269 						max_distance, bb_size,
3270 						pressure_class,	&nregs,
3271 						hoisted_bbs, occr->insn))
3272 		    {
3273 		      hoistable++;
3274 		      occrs_to_hoist.safe_push (occr);
3275 		      bitmap_set_bit (from_bbs, dominated->index);
3276 		    }
3277 		}
3278 
3279 	      /* If we found more than one hoistable occurrence of this
3280 		 expression, then note it in the vector of expressions to
3281 		 hoist.  It makes no sense to hoist things which are computed
3282 		 in only one BB, and doing so tends to pessimize register
3283 		 allocation.  One could increase this value to try harder
3284 		 to avoid any possible code expansion due to register
3285 		 allocation issues; however experiments have shown that
3286 		 the vast majority of hoistable expressions are only movable
3287 		 from two successors, so raising this threshold is likely
3288 		 to nullify any benefit we get from code hoisting.  */
3289 	      if (hoistable > 1 && dbg_cnt (hoist_insn))
3290 		{
3291 		  /* If (hoistable != vec::length), then there is
3292 		     an occurrence of EXPR in BB itself.  Don't waste
3293 		     time looking for LCA in this case.  */
3294 		  if ((unsigned) hoistable == occrs_to_hoist.length ())
3295 		    {
3296 		      basic_block lca;
3297 
3298 		      lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3299 							      from_bbs);
3300 		      if (lca != bb)
3301 			/* Punt, it's better to hoist these occurrences to
3302 			   LCA.  */
3303 			occrs_to_hoist.release ();
3304 		    }
3305 		}
3306 	      else
3307 		/* Punt, no point hoisting a single occurence.  */
3308 		occrs_to_hoist.release ();
3309 
3310 	      if (flag_ira_hoist_pressure
3311 		  && !occrs_to_hoist.is_empty ())
3312 		{
3313 		  /* Increase register pressure of basic blocks to which
3314 		     expr is hoisted because of extended live range of
3315 		     output.  */
3316 		  data = BB_DATA (bb);
3317 		  data->max_reg_pressure[pressure_class] += nregs;
3318 		  EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3319 		    {
3320 		      data = BB_DATA (BASIC_BLOCK (k));
3321 		      data->max_reg_pressure[pressure_class] += nregs;
3322 		    }
3323 		}
3324 	      else if (flag_ira_hoist_pressure)
3325 		{
3326 		  /* Restore register pressure and live_in info for basic
3327 		     blocks recorded in hoisted_bbs when expr will not be
3328 		     hoisted.  */
3329 		  EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3330 		    {
3331 		      data = BB_DATA (BASIC_BLOCK (k));
3332 		      bitmap_copy (data->live_in, data->backup);
3333 		      data->max_reg_pressure[pressure_class]
3334 			  = data->old_pressure;
3335 		    }
3336 		}
3337 
3338 	      if (flag_ira_hoist_pressure)
3339 		bitmap_clear (hoisted_bbs);
3340 
3341 	      insn_inserted_p = 0;
3342 
3343 	      /* Walk through occurrences of I'th expressions we want
3344 		 to hoist to BB and make the transformations.  */
3345 	      FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3346 		{
3347 		  rtx insn;
3348 		  rtx set;
3349 
3350 		  gcc_assert (!occr->deleted_p);
3351 
3352 		  insn = occr->insn;
3353 		  set = single_set (insn);
3354 		  gcc_assert (set);
3355 
3356 		  /* Create a pseudo-reg to store the result of reaching
3357 		     expressions into.  Get the mode for the new pseudo
3358 		     from the mode of the original destination pseudo.
3359 
3360 		     It is important to use new pseudos whenever we
3361 		     emit a set.  This will allow reload to use
3362 		     rematerialization for such registers.  */
3363 		  if (!insn_inserted_p)
3364 		    expr->reaching_reg
3365 		      = gen_reg_rtx_and_attrs (SET_DEST (set));
3366 
3367 		  gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3368 					insn);
3369 		  delete_insn (insn);
3370 		  occr->deleted_p = 1;
3371 		  changed = 1;
3372 		  gcse_subst_count++;
3373 
3374 		  if (!insn_inserted_p)
3375 		    {
3376 		      insert_insn_end_basic_block (expr, bb);
3377 		      insn_inserted_p = 1;
3378 		    }
3379 		}
3380 
3381 	      occrs_to_hoist.release ();
3382 	      bitmap_clear (from_bbs);
3383 	    }
3384 	}
3385       domby.release ();
3386     }
3387 
3388   dom_tree_walk.release ();
3389   BITMAP_FREE (from_bbs);
3390   if (flag_ira_hoist_pressure)
3391     BITMAP_FREE (hoisted_bbs);
3392 
3393   free (bb_size);
3394   free (to_bb_head);
3395   free (index_map);
3396 
3397   return changed;
3398 }
3399 
3400 /* Return pressure class and number of needed hard registers (through
3401    *NREGS) of register REGNO.  */
3402 static enum reg_class
get_regno_pressure_class(int regno,int * nregs)3403 get_regno_pressure_class (int regno, int *nregs)
3404 {
3405   if (regno >= FIRST_PSEUDO_REGISTER)
3406     {
3407       enum reg_class pressure_class;
3408 
3409       pressure_class = reg_allocno_class (regno);
3410       pressure_class = ira_pressure_class_translate[pressure_class];
3411       *nregs
3412 	= ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3413       return pressure_class;
3414     }
3415   else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3416 	   && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3417     {
3418       *nregs = 1;
3419       return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3420     }
3421   else
3422     {
3423       *nregs = 0;
3424       return NO_REGS;
3425     }
3426 }
3427 
3428 /* Return pressure class and number of hard registers (through *NREGS)
3429    for destination of INSN. */
3430 static enum reg_class
get_pressure_class_and_nregs(rtx insn,int * nregs)3431 get_pressure_class_and_nregs (rtx insn, int *nregs)
3432 {
3433   rtx reg;
3434   enum reg_class pressure_class;
3435   rtx set = single_set (insn);
3436 
3437   /* Considered invariant insns have only one set.  */
3438   gcc_assert (set != NULL_RTX);
3439   reg = SET_DEST (set);
3440   if (GET_CODE (reg) == SUBREG)
3441     reg = SUBREG_REG (reg);
3442   if (MEM_P (reg))
3443     {
3444       *nregs = 0;
3445       pressure_class = NO_REGS;
3446     }
3447   else
3448     {
3449       gcc_assert (REG_P (reg));
3450       pressure_class = reg_allocno_class (REGNO (reg));
3451       pressure_class = ira_pressure_class_translate[pressure_class];
3452       *nregs
3453 	= ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3454     }
3455   return pressure_class;
3456 }
3457 
3458 /* Increase (if INCR_P) or decrease current register pressure for
3459    register REGNO.  */
3460 static void
change_pressure(int regno,bool incr_p)3461 change_pressure (int regno, bool incr_p)
3462 {
3463   int nregs;
3464   enum reg_class pressure_class;
3465 
3466   pressure_class = get_regno_pressure_class (regno, &nregs);
3467   if (! incr_p)
3468     curr_reg_pressure[pressure_class] -= nregs;
3469   else
3470     {
3471       curr_reg_pressure[pressure_class] += nregs;
3472       if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3473 	  < curr_reg_pressure[pressure_class])
3474 	BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3475 	  = curr_reg_pressure[pressure_class];
3476     }
3477 }
3478 
3479 /* Calculate register pressure for each basic block by walking insns
3480    from last to first.  */
3481 static void
calculate_bb_reg_pressure(void)3482 calculate_bb_reg_pressure (void)
3483 {
3484   int i;
3485   unsigned int j;
3486   rtx insn;
3487   basic_block bb;
3488   bitmap curr_regs_live;
3489   bitmap_iterator bi;
3490 
3491 
3492   ira_setup_eliminable_regset (false);
3493   curr_regs_live = BITMAP_ALLOC (&reg_obstack);
3494   FOR_EACH_BB (bb)
3495     {
3496       curr_bb = bb;
3497       BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3498       BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3499       bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3500       bitmap_copy (curr_regs_live, df_get_live_out (bb));
3501       for (i = 0; i < ira_pressure_classes_num; i++)
3502 	curr_reg_pressure[ira_pressure_classes[i]] = 0;
3503       EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3504 	change_pressure (j, true);
3505 
3506       FOR_BB_INSNS_REVERSE (bb, insn)
3507 	{
3508 	  rtx dreg;
3509 	  int regno;
3510 	  df_ref *def_rec, *use_rec;
3511 
3512 	  if (! NONDEBUG_INSN_P (insn))
3513 	    continue;
3514 
3515 	  for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
3516 	    {
3517 	      dreg = DF_REF_REAL_REG (*def_rec);
3518 	      gcc_assert (REG_P (dreg));
3519 	      regno = REGNO (dreg);
3520 	      if (!(DF_REF_FLAGS (*def_rec)
3521 		    & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3522 		{
3523 		  if (bitmap_clear_bit (curr_regs_live, regno))
3524 		    change_pressure (regno, false);
3525 		}
3526 	    }
3527 
3528 	  for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
3529 	    {
3530 	      dreg = DF_REF_REAL_REG (*use_rec);
3531 	      gcc_assert (REG_P (dreg));
3532 	      regno = REGNO (dreg);
3533 	      if (bitmap_set_bit (curr_regs_live, regno))
3534 		change_pressure (regno, true);
3535 	    }
3536 	}
3537     }
3538   BITMAP_FREE (curr_regs_live);
3539 
3540   if (dump_file == NULL)
3541     return;
3542 
3543   fprintf (dump_file, "\nRegister Pressure: \n");
3544   FOR_EACH_BB (bb)
3545     {
3546       fprintf (dump_file, "  Basic block %d: \n", bb->index);
3547       for (i = 0; (int) i < ira_pressure_classes_num; i++)
3548 	{
3549 	  enum reg_class pressure_class;
3550 
3551 	  pressure_class = ira_pressure_classes[i];
3552 	  if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3553 	    continue;
3554 
3555 	  fprintf (dump_file, "    %s=%d\n", reg_class_names[pressure_class],
3556 		   BB_DATA (bb)->max_reg_pressure[pressure_class]);
3557 	}
3558     }
3559   fprintf (dump_file, "\n");
3560 }
3561 
3562 /* Top level routine to perform one code hoisting (aka unification) pass
3563 
3564    Return nonzero if a change was made.  */
3565 
3566 static int
one_code_hoisting_pass(void)3567 one_code_hoisting_pass (void)
3568 {
3569   int changed = 0;
3570 
3571   gcse_subst_count = 0;
3572   gcse_create_count = 0;
3573 
3574   /* Return if there's nothing to do, or it is too expensive.  */
3575   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
3576       || is_too_expensive (_("GCSE disabled")))
3577     return 0;
3578 
3579   doing_code_hoisting_p = true;
3580 
3581   /* Calculate register pressure for each basic block.  */
3582   if (flag_ira_hoist_pressure)
3583     {
3584       regstat_init_n_sets_and_refs ();
3585       ira_set_pseudo_classes (false, dump_file);
3586       alloc_aux_for_blocks (sizeof (struct bb_data));
3587       calculate_bb_reg_pressure ();
3588       regstat_free_n_sets_and_refs ();
3589     }
3590 
3591   /* We need alias.  */
3592   init_alias_analysis ();
3593 
3594   bytes_used = 0;
3595   gcc_obstack_init (&gcse_obstack);
3596   alloc_gcse_mem ();
3597 
3598   alloc_hash_table (&expr_hash_table);
3599   compute_hash_table (&expr_hash_table);
3600   if (dump_file)
3601     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3602 
3603   if (expr_hash_table.n_elems > 0)
3604     {
3605       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3606       compute_code_hoist_data ();
3607       changed = hoist_code ();
3608       free_code_hoist_mem ();
3609     }
3610 
3611   if (flag_ira_hoist_pressure)
3612     {
3613       free_aux_for_blocks ();
3614       free_reg_info ();
3615     }
3616   free_hash_table (&expr_hash_table);
3617   free_gcse_mem ();
3618   obstack_free (&gcse_obstack, NULL);
3619 
3620   /* We are finished with alias.  */
3621   end_alias_analysis ();
3622 
3623   if (dump_file)
3624     {
3625       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3626 	       current_function_name (), n_basic_blocks, bytes_used);
3627       fprintf (dump_file, "%d substs, %d insns created\n",
3628 	       gcse_subst_count, gcse_create_count);
3629     }
3630 
3631   doing_code_hoisting_p = false;
3632 
3633   return changed;
3634 }
3635 
3636 /*  Here we provide the things required to do store motion towards the exit.
3637     In order for this to be effective, gcse also needed to be taught how to
3638     move a load when it is killed only by a store to itself.
3639 
3640 	    int i;
3641 	    float a[10];
3642 
3643 	    void foo(float scale)
3644 	    {
3645 	      for (i=0; i<10; i++)
3646 		a[i] *= scale;
3647 	    }
3648 
3649     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3650     the load out since its live around the loop, and stored at the bottom
3651     of the loop.
3652 
3653       The 'Load Motion' referred to and implemented in this file is
3654     an enhancement to gcse which when using edge based LCM, recognizes
3655     this situation and allows gcse to move the load out of the loop.
3656 
3657       Once gcse has hoisted the load, store motion can then push this
3658     load towards the exit, and we end up with no loads or stores of 'i'
3659     in the loop.  */
3660 
3661 static hashval_t
pre_ldst_expr_hash(const void * p)3662 pre_ldst_expr_hash (const void *p)
3663 {
3664   int do_not_record_p = 0;
3665   const struct ls_expr *const x = (const struct ls_expr *) p;
3666   return
3667     hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
3668 }
3669 
3670 static int
pre_ldst_expr_eq(const void * p1,const void * p2)3671 pre_ldst_expr_eq (const void *p1, const void *p2)
3672 {
3673   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
3674     *const ptr2 = (const struct ls_expr *) p2;
3675   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
3676 }
3677 
3678 /* This will search the ldst list for a matching expression. If it
3679    doesn't find one, we create one and initialize it.  */
3680 
3681 static struct ls_expr *
ldst_entry(rtx x)3682 ldst_entry (rtx x)
3683 {
3684   int do_not_record_p = 0;
3685   struct ls_expr * ptr;
3686   unsigned int hash;
3687   void **slot;
3688   struct ls_expr e;
3689 
3690   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3691 		   NULL,  /*have_reg_qty=*/false);
3692 
3693   e.pattern = x;
3694   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
3695   if (*slot)
3696     return (struct ls_expr *)*slot;
3697 
3698   ptr = XNEW (struct ls_expr);
3699 
3700   ptr->next         = pre_ldst_mems;
3701   ptr->expr         = NULL;
3702   ptr->pattern      = x;
3703   ptr->pattern_regs = NULL_RTX;
3704   ptr->loads        = NULL_RTX;
3705   ptr->stores       = NULL_RTX;
3706   ptr->reaching_reg = NULL_RTX;
3707   ptr->invalid      = 0;
3708   ptr->index        = 0;
3709   ptr->hash_index   = hash;
3710   pre_ldst_mems     = ptr;
3711   *slot = ptr;
3712 
3713   return ptr;
3714 }
3715 
3716 /* Free up an individual ldst entry.  */
3717 
3718 static void
free_ldst_entry(struct ls_expr * ptr)3719 free_ldst_entry (struct ls_expr * ptr)
3720 {
3721   free_INSN_LIST_list (& ptr->loads);
3722   free_INSN_LIST_list (& ptr->stores);
3723 
3724   free (ptr);
3725 }
3726 
3727 /* Free up all memory associated with the ldst list.  */
3728 
3729 static void
free_ld_motion_mems(void)3730 free_ld_motion_mems (void)
3731 {
3732   if (pre_ldst_table)
3733     htab_delete (pre_ldst_table);
3734   pre_ldst_table = NULL;
3735 
3736   while (pre_ldst_mems)
3737     {
3738       struct ls_expr * tmp = pre_ldst_mems;
3739 
3740       pre_ldst_mems = pre_ldst_mems->next;
3741 
3742       free_ldst_entry (tmp);
3743     }
3744 
3745   pre_ldst_mems = NULL;
3746 }
3747 
3748 /* Dump debugging info about the ldst list.  */
3749 
3750 static void
print_ldst_list(FILE * file)3751 print_ldst_list (FILE * file)
3752 {
3753   struct ls_expr * ptr;
3754 
3755   fprintf (file, "LDST list: \n");
3756 
3757   for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3758     {
3759       fprintf (file, "  Pattern (%3d): ", ptr->index);
3760 
3761       print_rtl (file, ptr->pattern);
3762 
3763       fprintf (file, "\n	 Loads : ");
3764 
3765       if (ptr->loads)
3766 	print_rtl (file, ptr->loads);
3767       else
3768 	fprintf (file, "(nil)");
3769 
3770       fprintf (file, "\n	Stores : ");
3771 
3772       if (ptr->stores)
3773 	print_rtl (file, ptr->stores);
3774       else
3775 	fprintf (file, "(nil)");
3776 
3777       fprintf (file, "\n\n");
3778     }
3779 
3780   fprintf (file, "\n");
3781 }
3782 
3783 /* Returns 1 if X is in the list of ldst only expressions.  */
3784 
3785 static struct ls_expr *
find_rtx_in_ldst(rtx x)3786 find_rtx_in_ldst (rtx x)
3787 {
3788   struct ls_expr e;
3789   void **slot;
3790   if (!pre_ldst_table)
3791     return NULL;
3792   e.pattern = x;
3793   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
3794   if (!slot || ((struct ls_expr *)*slot)->invalid)
3795     return NULL;
3796   return (struct ls_expr *) *slot;
3797 }
3798 
3799 /* Load Motion for loads which only kill themselves.  */
3800 
3801 /* Return true if x, a MEM, is a simple access with no side effects.
3802    These are the types of loads we consider for the ld_motion list,
3803    otherwise we let the usual aliasing take care of it.  */
3804 
3805 static int
simple_mem(const_rtx x)3806 simple_mem (const_rtx x)
3807 {
3808   if (MEM_VOLATILE_P (x))
3809     return 0;
3810 
3811   if (GET_MODE (x) == BLKmode)
3812     return 0;
3813 
3814   /* If we are handling exceptions, we must be careful with memory references
3815      that may trap.  If we are not, the behavior is undefined, so we may just
3816      continue.  */
3817   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3818     return 0;
3819 
3820   if (side_effects_p (x))
3821     return 0;
3822 
3823   /* Do not consider function arguments passed on stack.  */
3824   if (reg_mentioned_p (stack_pointer_rtx, x))
3825     return 0;
3826 
3827   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3828     return 0;
3829 
3830   return 1;
3831 }
3832 
3833 /* Make sure there isn't a buried reference in this pattern anywhere.
3834    If there is, invalidate the entry for it since we're not capable
3835    of fixing it up just yet.. We have to be sure we know about ALL
3836    loads since the aliasing code will allow all entries in the
3837    ld_motion list to not-alias itself.  If we miss a load, we will get
3838    the wrong value since gcse might common it and we won't know to
3839    fix it up.  */
3840 
3841 static void
invalidate_any_buried_refs(rtx x)3842 invalidate_any_buried_refs (rtx x)
3843 {
3844   const char * fmt;
3845   int i, j;
3846   struct ls_expr * ptr;
3847 
3848   /* Invalidate it in the list.  */
3849   if (MEM_P (x) && simple_mem (x))
3850     {
3851       ptr = ldst_entry (x);
3852       ptr->invalid = 1;
3853     }
3854 
3855   /* Recursively process the insn.  */
3856   fmt = GET_RTX_FORMAT (GET_CODE (x));
3857 
3858   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3859     {
3860       if (fmt[i] == 'e')
3861 	invalidate_any_buried_refs (XEXP (x, i));
3862       else if (fmt[i] == 'E')
3863 	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3864 	  invalidate_any_buried_refs (XVECEXP (x, i, j));
3865     }
3866 }
3867 
3868 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
3869    being defined as MEM loads and stores to symbols, with no side effects
3870    and no registers in the expression.  For a MEM destination, we also
3871    check that the insn is still valid if we replace the destination with a
3872    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
3873    which don't match this criteria, they are invalidated and trimmed out
3874    later.  */
3875 
3876 static void
compute_ld_motion_mems(void)3877 compute_ld_motion_mems (void)
3878 {
3879   struct ls_expr * ptr;
3880   basic_block bb;
3881   rtx insn;
3882 
3883   pre_ldst_mems = NULL;
3884   pre_ldst_table
3885     = htab_create (13, pre_ldst_expr_hash, pre_ldst_expr_eq, NULL);
3886 
3887   FOR_EACH_BB (bb)
3888     {
3889       FOR_BB_INSNS (bb, insn)
3890 	{
3891 	  if (NONDEBUG_INSN_P (insn))
3892 	    {
3893 	      if (GET_CODE (PATTERN (insn)) == SET)
3894 		{
3895 		  rtx src = SET_SRC (PATTERN (insn));
3896 		  rtx dest = SET_DEST (PATTERN (insn));
3897 
3898 		  /* Check for a simple LOAD...  */
3899 		  if (MEM_P (src) && simple_mem (src))
3900 		    {
3901 		      ptr = ldst_entry (src);
3902 		      if (REG_P (dest))
3903 			ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
3904 		      else
3905 			ptr->invalid = 1;
3906 		    }
3907 		  else
3908 		    {
3909 		      /* Make sure there isn't a buried load somewhere.  */
3910 		      invalidate_any_buried_refs (src);
3911 		    }
3912 
3913 		  /* Check for stores. Don't worry about aliased ones, they
3914 		     will block any movement we might do later. We only care
3915 		     about this exact pattern since those are the only
3916 		     circumstance that we will ignore the aliasing info.  */
3917 		  if (MEM_P (dest) && simple_mem (dest))
3918 		    {
3919 		      ptr = ldst_entry (dest);
3920 
3921 		      if (! MEM_P (src)
3922 			  && GET_CODE (src) != ASM_OPERANDS
3923 			  /* Check for REG manually since want_to_gcse_p
3924 			     returns 0 for all REGs.  */
3925 			  && can_assign_to_reg_without_clobbers_p (src))
3926 			ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
3927 		      else
3928 			ptr->invalid = 1;
3929 		    }
3930 		}
3931 	      else
3932 		invalidate_any_buried_refs (PATTERN (insn));
3933 	    }
3934 	}
3935     }
3936 }
3937 
3938 /* Remove any references that have been either invalidated or are not in the
3939    expression list for pre gcse.  */
3940 
3941 static void
trim_ld_motion_mems(void)3942 trim_ld_motion_mems (void)
3943 {
3944   struct ls_expr * * last = & pre_ldst_mems;
3945   struct ls_expr * ptr = pre_ldst_mems;
3946 
3947   while (ptr != NULL)
3948     {
3949       struct expr * expr;
3950 
3951       /* Delete if entry has been made invalid.  */
3952       if (! ptr->invalid)
3953 	{
3954 	  /* Delete if we cannot find this mem in the expression list.  */
3955 	  unsigned int hash = ptr->hash_index % expr_hash_table.size;
3956 
3957 	  for (expr = expr_hash_table.table[hash];
3958 	       expr != NULL;
3959 	       expr = expr->next_same_hash)
3960 	    if (expr_equiv_p (expr->expr, ptr->pattern))
3961 	      break;
3962 	}
3963       else
3964 	expr = (struct expr *) 0;
3965 
3966       if (expr)
3967 	{
3968 	  /* Set the expression field if we are keeping it.  */
3969 	  ptr->expr = expr;
3970 	  last = & ptr->next;
3971 	  ptr = ptr->next;
3972 	}
3973       else
3974 	{
3975 	  *last = ptr->next;
3976 	  htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3977 	  free_ldst_entry (ptr);
3978 	  ptr = * last;
3979 	}
3980     }
3981 
3982   /* Show the world what we've found.  */
3983   if (dump_file && pre_ldst_mems != NULL)
3984     print_ldst_list (dump_file);
3985 }
3986 
3987 /* This routine will take an expression which we are replacing with
3988    a reaching register, and update any stores that are needed if
3989    that expression is in the ld_motion list.  Stores are updated by
3990    copying their SRC to the reaching register, and then storing
3991    the reaching register into the store location. These keeps the
3992    correct value in the reaching register for the loads.  */
3993 
3994 static void
update_ld_motion_stores(struct expr * expr)3995 update_ld_motion_stores (struct expr * expr)
3996 {
3997   struct ls_expr * mem_ptr;
3998 
3999   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
4000     {
4001       /* We can try to find just the REACHED stores, but is shouldn't
4002 	 matter to set the reaching reg everywhere...  some might be
4003 	 dead and should be eliminated later.  */
4004 
4005       /* We replace (set mem expr) with (set reg expr) (set mem reg)
4006 	 where reg is the reaching reg used in the load.  We checked in
4007 	 compute_ld_motion_mems that we can replace (set mem expr) with
4008 	 (set reg expr) in that insn.  */
4009       rtx list = mem_ptr->stores;
4010 
4011       for ( ; list != NULL_RTX; list = XEXP (list, 1))
4012 	{
4013 	  rtx insn = XEXP (list, 0);
4014 	  rtx pat = PATTERN (insn);
4015 	  rtx src = SET_SRC (pat);
4016 	  rtx reg = expr->reaching_reg;
4017 	  rtx copy;
4018 
4019 	  /* If we've already copied it, continue.  */
4020 	  if (expr->reaching_reg == src)
4021 	    continue;
4022 
4023 	  if (dump_file)
4024 	    {
4025 	      fprintf (dump_file, "PRE:  store updated with reaching reg ");
4026 	      print_rtl (dump_file, reg);
4027 	      fprintf (dump_file, ":\n	");
4028 	      print_inline_rtx (dump_file, insn, 8);
4029 	      fprintf (dump_file, "\n");
4030 	    }
4031 
4032 	  copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
4033 	  emit_insn_before (copy, insn);
4034 	  SET_SRC (pat) = reg;
4035 	  df_insn_rescan (insn);
4036 
4037 	  /* un-recognize this pattern since it's probably different now.  */
4038 	  INSN_CODE (insn) = -1;
4039 	  gcse_create_count++;
4040 	}
4041     }
4042 }
4043 
4044 /* Return true if the graph is too expensive to optimize. PASS is the
4045    optimization about to be performed.  */
4046 
4047 static bool
is_too_expensive(const char * pass)4048 is_too_expensive (const char *pass)
4049 {
4050   /* Trying to perform global optimizations on flow graphs which have
4051      a high connectivity will take a long time and is unlikely to be
4052      particularly useful.
4053 
4054      In normal circumstances a cfg should have about twice as many
4055      edges as blocks.  But we do not want to punish small functions
4056      which have a couple switch statements.  Rather than simply
4057      threshold the number of blocks, uses something with a more
4058      graceful degradation.  */
4059   if (n_edges > 20000 + n_basic_blocks * 4)
4060     {
4061       warning (OPT_Wdisabled_optimization,
4062 	       "%s: %d basic blocks and %d edges/basic block",
4063 	       pass, n_basic_blocks, n_edges / n_basic_blocks);
4064 
4065       return true;
4066     }
4067 
4068   /* If allocating memory for the dataflow bitmaps would take up too much
4069      storage it's better just to disable the optimization.  */
4070   if ((n_basic_blocks
4071        * SBITMAP_SET_SIZE (max_reg_num ())
4072        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
4073     {
4074       warning (OPT_Wdisabled_optimization,
4075 	       "%s: %d basic blocks and %d registers",
4076 	       pass, n_basic_blocks, max_reg_num ());
4077 
4078       return true;
4079     }
4080 
4081   return false;
4082 }
4083 
4084 /* All the passes implemented in this file.  Each pass has its
4085    own gate and execute function, and at the end of the file a
4086    pass definition for passes.c.
4087 
4088    We do not construct an accurate cfg in functions which call
4089    setjmp, so none of these passes runs if the function calls
4090    setjmp.
4091    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
4092 
4093 static bool
gate_rtl_pre(void)4094 gate_rtl_pre (void)
4095 {
4096   return optimize > 0 && flag_gcse
4097     && !cfun->calls_setjmp
4098     && optimize_function_for_speed_p (cfun)
4099     && dbg_cnt (pre);
4100 }
4101 
4102 static unsigned int
execute_rtl_pre(void)4103 execute_rtl_pre (void)
4104 {
4105   int changed;
4106   delete_unreachable_blocks ();
4107   df_analyze ();
4108   changed = one_pre_gcse_pass ();
4109   flag_rerun_cse_after_global_opts |= changed;
4110   if (changed)
4111     cleanup_cfg (0);
4112   return 0;
4113 }
4114 
4115 static bool
gate_rtl_hoist(void)4116 gate_rtl_hoist (void)
4117 {
4118   return optimize > 0 && flag_gcse
4119     && !cfun->calls_setjmp
4120     /* It does not make sense to run code hoisting unless we are optimizing
4121        for code size -- it rarely makes programs faster, and can make then
4122        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
4123     && optimize_function_for_size_p (cfun)
4124     && dbg_cnt (hoist);
4125 }
4126 
4127 static unsigned int
execute_rtl_hoist(void)4128 execute_rtl_hoist (void)
4129 {
4130   int changed;
4131   delete_unreachable_blocks ();
4132   df_analyze ();
4133   changed = one_code_hoisting_pass ();
4134   flag_rerun_cse_after_global_opts |= changed;
4135   if (changed)
4136     cleanup_cfg (0);
4137   return 0;
4138 }
4139 
4140 struct rtl_opt_pass pass_rtl_pre =
4141 {
4142  {
4143   RTL_PASS,
4144   "rtl pre",                            /* name */
4145   OPTGROUP_NONE,                        /* optinfo_flags */
4146   gate_rtl_pre,                         /* gate */
4147   execute_rtl_pre,    			/* execute */
4148   NULL,                                 /* sub */
4149   NULL,                                 /* next */
4150   0,                                    /* static_pass_number */
4151   TV_PRE,                               /* tv_id */
4152   PROP_cfglayout,                       /* properties_required */
4153   0,                                    /* properties_provided */
4154   0,                                    /* properties_destroyed */
4155   0,                                    /* todo_flags_start */
4156   TODO_df_finish | TODO_verify_rtl_sharing |
4157   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
4158  }
4159 };
4160 
4161 struct rtl_opt_pass pass_rtl_hoist =
4162 {
4163  {
4164   RTL_PASS,
4165   "hoist",                              /* name */
4166   OPTGROUP_NONE,                        /* optinfo_flags */
4167   gate_rtl_hoist,                       /* gate */
4168   execute_rtl_hoist,  			/* execute */
4169   NULL,                                 /* sub */
4170   NULL,                                 /* next */
4171   0,                                    /* static_pass_number */
4172   TV_HOIST,                             /* tv_id */
4173   PROP_cfglayout,                       /* properties_required */
4174   0,                                    /* properties_provided */
4175   0,                                    /* properties_destroyed */
4176   0,                                    /* todo_flags_start */
4177   TODO_df_finish | TODO_verify_rtl_sharing |
4178   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
4179  }
4180 };
4181 
4182 #include "gt-gcse.h"
4183