xref: /dragonfly/contrib/gcc-4.7/gcc/ree.c (revision cfd1aba3)
1 /* Redundant Extension Elimination pass for the GNU compiler.
2    Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
3    Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4 
5    Based on the Redundant Zero-extension elimination pass contributed by
6    Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 
25 /* Problem Description :
26    --------------------
27    This pass is intended to remove redundant extension instructions.
28    Such instructions appear for different reasons.  We expect some of
29    them due to implicit zero-extension in 64-bit registers after writing
30    to their lower 32-bit half (e.g. for the x86-64 architecture).
31    Another possible reason is a type cast which follows a load (for
32    instance a register restore) and which can be combined into a single
33    instruction, and for which earlier local passes, e.g. the combiner,
34    weren't able to optimize.
35 
36    How does this pass work  ?
37    --------------------------
38 
39    This pass is run after register allocation.  Hence, all registers that
40    this pass deals with are hard registers.  This pass first looks for an
41    extension instruction that could possibly be redundant.  Such extension
42    instructions show up in RTL with the pattern  :
43    (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44    where x can be any hard register.
45    Now, this pass tries to eliminate this instruction by merging the
46    extension with the definitions of register x.  For instance, if
47    one of the definitions of register x was  :
48    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49    followed by extension  :
50    (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51    then the combination converts this into :
52    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53    If all the merged definitions are recognizable assembly instructions,
54    the extension is effectively eliminated.
55 
56    For example, for the x86-64 architecture, implicit zero-extensions
57    are captured with appropriate patterns in the i386.md file.  Hence,
58    these merged definition can be matched to a single assembly instruction.
59    The original extension instruction is then deleted if all the
60    definitions can be merged.
61 
62    However, there are cases where the definition instruction cannot be
63    merged with an extension.  Examples are CALL instructions.  In such
64    cases, the original extension is not redundant and this pass does
65    not delete it.
66 
67    Handling conditional moves :
68    ----------------------------
69 
70    Architectures like x86-64 support conditional moves whose semantics for
71    extension differ from the other instructions.  For instance, the
72    instruction *cmov ebx, eax*
73    zero-extends eax onto rax only when the move from ebx to eax happens.
74    Otherwise, eax may not be zero-extended.  Consider conditional moves as
75    RTL instructions of the form
76    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77    This pass tries to merge an extension with a conditional move by
78    actually merging the definitions of y and z with an extension and then
79    converting the conditional move into :
80    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81    Since registers y and z are extended, register x will also be extended
82    after the conditional move.  Note that this step has to be done
83    transitively since the definition of a conditional copy can be
84    another conditional copy.
85 
86    Motivating Example I :
87    ---------------------
88    For this program :
89    **********************************************
90    bad_code.c
91 
92    int mask[1000];
93 
94    int foo(unsigned x)
95    {
96      if (x < 10)
97        x = x * 45;
98      else
99        x = x * 78;
100      return mask[x];
101    }
102    **********************************************
103 
104    $ gcc -O2 bad_code.c
105      ........
106      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107      40031a:       0f af f8                imul   %eax,%edi
108      40031d:       89 ff                   mov    %edi,%edi - useless extension
109      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110      400326:       c3                      retq
111      ......
112      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113      400335:       0f af fa                imul   %edx,%edi
114      400338:       89 ff                   mov    %edi,%edi - useless extension
115      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116      400341:       c3                      retq
117 
118    $ gcc -O2 -free bad_code.c
119      ......
120      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122      40031f:       c3                      retq
123      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125      40032a:       c3                      retq
126 
127    Motivating Example II :
128    ---------------------
129 
130    Here is an example with a conditional move.
131 
132    For this program :
133    **********************************************
134 
135    unsigned long long foo(unsigned x , unsigned y)
136    {
137      unsigned z;
138      if (x > 100)
139        z = x + y;
140      else
141        z = x - y;
142      return (unsigned long long)(z);
143    }
144 
145    $ gcc -O2 bad_code.c
146      ............
147      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148      400363:       89 f8                   mov    %edi,%eax
149      400365:       29 f0                   sub    %esi,%eax
150      400367:       83 ff 65                cmp    $0x65,%edi
151      40036a:       0f 43 c2                cmovae %edx,%eax
152      40036d:       89 c0                   mov    %eax,%eax - useless extension
153      40036f:       c3                      retq
154 
155    $ gcc -O2 -free bad_code.c
156      .............
157      400360:       89 fa                   mov    %edi,%edx
158      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159      400365:       29 f2                   sub    %esi,%edx
160      400367:       83 ff 65                cmp    $0x65,%edi
161      40036a:       89 d6                   mov    %edx,%esi
162      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163      400370:       c3                      retq
164 
165   Motivating Example III :
166   ---------------------
167 
168   Here is an example with a type cast.
169 
170   For this program :
171   **********************************************
172 
173   void test(int size, unsigned char *in, unsigned char *out)
174   {
175     int i;
176     unsigned char xr, xg, xy=0;
177 
178     for (i = 0; i < size; i++) {
179       xr = *in++;
180       xg = *in++;
181       xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182       *out++ = xy;
183     }
184   }
185 
186   $ gcc -O2 bad_code.c
187     ............
188     10:   0f b6 0e                movzbl (%rsi),%ecx
189     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190     17:   48 83 c6 02             add    $0x2,%rsi
191     1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192     1e:   0f b6 c0                movzbl %al,%eax - useless extension
193     21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194     27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195 
196    $ gcc -O2 -free bad_code.c
197      .............
198     10:   0f b6 0e                movzbl (%rsi),%ecx
199     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200     17:   48 83 c6 02             add    $0x2,%rsi
201     1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202     21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203 
204    Usefulness :
205    ----------
206 
207    The original redundant zero-extension elimination pass reported reduction
208    of the dynamic instruction count of a compression benchmark by 2.8% and
209    improvement of its run time by about 1%.
210 
211    The additional performance gain with the enhanced pass is mostly expected
212    on in-order architectures where redundancy cannot be compensated by out of
213    order execution.  Measurements showed up to 10% performance gain (reduced
214    run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215    gain 1%.  */
216 
217 
218 #include "config.h"
219 #include "system.h"
220 #include "coretypes.h"
221 #include "tm.h"
222 #include "rtl.h"
223 #include "tree.h"
224 #include "tm_p.h"
225 #include "flags.h"
226 #include "regs.h"
227 #include "hard-reg-set.h"
228 #include "basic-block.h"
229 #include "insn-config.h"
230 #include "function.h"
231 #include "expr.h"
232 #include "insn-attr.h"
233 #include "recog.h"
234 #include "diagnostic-core.h"
235 #include "target.h"
236 #include "timevar.h"
237 #include "optabs.h"
238 #include "insn-codes.h"
239 #include "rtlhooks-def.h"
240 /* Include output.h for dump_file.  */
241 #include "output.h"
242 #include "params.h"
243 #include "timevar.h"
244 #include "tree-pass.h"
245 #include "df.h"
246 #include "cgraph.h"
247 
248 /* This structure represents a candidate for elimination.  */
249 
250 typedef struct GTY(()) ext_cand
251 {
252   /* The expression.  */
253   const_rtx expr;
254 
255   /* The kind of extension.  */
256   enum rtx_code code;
257 
258   /* The destination mode.  */
259   enum machine_mode mode;
260 
261   /* The instruction where it lives.  */
262   rtx insn;
263 } ext_cand;
264 
265 DEF_VEC_O(ext_cand);
266 DEF_VEC_ALLOC_O(ext_cand, heap);
267 
268 static int max_insn_uid;
269 
270 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
271    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
272    this code modifies the SET rtx to a new SET rtx that extends the
273    right hand expression into a register on the left hand side.  Note
274    that multiple assumptions are made about the nature of the set that
275    needs to be true for this to work and is called from merge_def_and_ext.
276 
277    Original :
278    (set (reg a) (expression))
279 
280    Transform :
281    (set (reg a) (any_extend (expression)))
282 
283    Special Cases :
284    If the expression is a constant or another extension, then directly
285    assign it to the register.  */
286 
287 static bool
288 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
289 {
290   rtx orig_src = SET_SRC (*orig_set);
291   rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
292   rtx new_set;
293 
294   /* Merge constants by directly moving the constant into the register under
295      some conditions.  Recall that RTL constants are sign-extended.  */
296   if (GET_CODE (orig_src) == CONST_INT
297       && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
298     {
299       if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
300 	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
301       else
302 	{
303 	  /* Zero-extend the negative constant by masking out the bits outside
304 	     the source mode.  */
305 	  enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
306 	  rtx new_const_int
307 	    = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode));
308 	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
309 	}
310     }
311   else if (GET_MODE (orig_src) == VOIDmode)
312     {
313       /* This is mostly due to a call insn that should not be optimized.  */
314       return false;
315     }
316   else if (GET_CODE (orig_src) == cand->code)
317     {
318       /* Here is a sequence of two extensions.  Try to merge them.  */
319       rtx temp_extension
320 	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
321       rtx simplified_temp_extension = simplify_rtx (temp_extension);
322       if (simplified_temp_extension)
323         temp_extension = simplified_temp_extension;
324       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
325     }
326   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
327     {
328       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
329          in general, IF_THEN_ELSE should not be combined.  */
330       return false;
331     }
332   else
333     {
334       /* This is the normal case.  */
335       rtx temp_extension
336 	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
337       rtx simplified_temp_extension = simplify_rtx (temp_extension);
338       if (simplified_temp_extension)
339         temp_extension = simplified_temp_extension;
340       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
341     }
342 
343   /* This change is a part of a group of changes.  Hence,
344      validate_change will not try to commit the change.  */
345   if (validate_change (curr_insn, orig_set, new_set, true))
346     {
347       if (dump_file)
348         {
349           fprintf (dump_file,
350 		   "Tentatively merged extension with definition:\n");
351           print_rtl_single (dump_file, curr_insn);
352         }
353       return true;
354     }
355 
356   return false;
357 }
358 
359 /* Treat if_then_else insns, where the operands of both branches
360    are registers, as copies.  For instance,
361    Original :
362    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
363    Transformed :
364    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
365    DEF_INSN is the if_then_else insn.  */
366 
367 static bool
368 transform_ifelse (ext_cand *cand, rtx def_insn)
369 {
370   rtx set_insn = PATTERN (def_insn);
371   rtx srcreg, dstreg, srcreg2;
372   rtx map_srcreg, map_dstreg, map_srcreg2;
373   rtx ifexpr;
374   rtx cond;
375   rtx new_set;
376 
377   gcc_assert (GET_CODE (set_insn) == SET);
378 
379   cond = XEXP (SET_SRC (set_insn), 0);
380   dstreg = SET_DEST (set_insn);
381   srcreg = XEXP (SET_SRC (set_insn), 1);
382   srcreg2 = XEXP (SET_SRC (set_insn), 2);
383   /* If the conditional move already has the right or wider mode,
384      there is nothing to do.  */
385   if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
386     return true;
387 
388   map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
389   map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
390   map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
391   ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
392   new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
393 
394   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
395     {
396       if (dump_file)
397         {
398           fprintf (dump_file,
399 		   "Mode of conditional move instruction extended:\n");
400           print_rtl_single (dump_file, def_insn);
401         }
402       return true;
403     }
404 
405   return false;
406 }
407 
408 /* Get all the reaching definitions of an instruction.  The definitions are
409    desired for REG used in INSN.  Return the definition list or NULL if a
410    definition is missing.  If DEST is non-NULL, additionally push the INSN
411    of the definitions onto DEST.  */
412 
413 static struct df_link *
414 get_defs (rtx insn, rtx reg, VEC (rtx,heap) **dest)
415 {
416   df_ref reg_info, *uses;
417   struct df_link *ref_chain, *ref_link;
418 
419   reg_info = NULL;
420 
421   for (uses = DF_INSN_USES (insn); *uses; uses++)
422     {
423       reg_info = *uses;
424       if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
425         return NULL;
426       if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
427         break;
428     }
429 
430   gcc_assert (reg_info != NULL && uses != NULL);
431 
432   ref_chain = DF_REF_CHAIN (reg_info);
433 
434   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
435     {
436       /* Problem getting some definition for this instruction.  */
437       if (ref_link->ref == NULL)
438         return NULL;
439       if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
440         return NULL;
441     }
442 
443   if (dest)
444     for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
445       VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (ref_link->ref));
446 
447   return ref_chain;
448 }
449 
450 /* Return true if INSN is
451      (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
452    and store x1 and x2 in REG_1 and REG_2.  */
453 
454 static bool
455 is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
456 {
457   rtx expr = single_set (insn);
458 
459   if (expr != NULL_RTX
460       && GET_CODE (expr) == SET
461       && GET_CODE (SET_DEST (expr)) == REG
462       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
463       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
464       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
465     {
466       *reg1 = XEXP (SET_SRC (expr), 1);
467       *reg2 = XEXP (SET_SRC (expr), 2);
468       return true;
469     }
470 
471   return false;
472 }
473 
474 enum ext_modified_kind
475 {
476   /* The insn hasn't been modified by ree pass yet.  */
477   EXT_MODIFIED_NONE,
478   /* Changed into zero extension.  */
479   EXT_MODIFIED_ZEXT,
480   /* Changed into sign extension.  */
481   EXT_MODIFIED_SEXT
482 };
483 
484 struct ext_modified
485 {
486   /* Mode from which ree has zero or sign extended the destination.  */
487   ENUM_BITFIELD(machine_mode) mode : 8;
488 
489   /* Kind of modification of the insn.  */
490   ENUM_BITFIELD(ext_modified_kind) kind : 2;
491 
492   /* True if the insn is scheduled to be deleted.  */
493   unsigned int deleted : 1;
494 };
495 
496 /* Vectors used by combine_reaching_defs and its helpers.  */
497 typedef struct ext_state
498 {
499   /* In order to avoid constant VEC_alloc/VEC_free, we keep these
500      4 vectors live through the entire find_and_remove_re and just
501      VEC_truncate them each time.  */
502   VEC (rtx, heap) *defs_list;
503   VEC (rtx, heap) *copies_list;
504   VEC (rtx, heap) *modified_list;
505   VEC (rtx, heap) *work_list;
506 
507   /* For instructions that have been successfully modified, this is
508      the original mode from which the insn is extending and
509      kind of extension.  */
510   struct ext_modified *modified;
511 } ext_state;
512 
513 /* Reaching Definitions of the extended register could be conditional copies
514    or regular definitions.  This function separates the two types into two
515    lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
516    if a reaching definition is a conditional copy, merging the extension with
517    this definition is wrong.  Conditional copies are merged by transitively
518    merging their definitions.  The defs_list is populated with all the reaching
519    definitions of the extension instruction (EXTEND_INSN) which must be merged
520    with an extension.  The copies_list contains all the conditional moves that
521    will later be extended into a wider mode conditional move if all the merges
522    are successful.  The function returns false upon failure, true upon
523    success.  */
524 
525 static bool
526 make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
527 			    ext_state *state)
528 {
529   rtx src_reg = XEXP (SET_SRC (set_pat), 0);
530   bool *is_insn_visited;
531   bool ret = true;
532 
533   VEC_truncate (rtx, state->work_list, 0);
534 
535   /* Initialize the work list.  */
536   if (!get_defs (extend_insn, src_reg, &state->work_list))
537     gcc_unreachable ();
538 
539   is_insn_visited = XCNEWVEC (bool, max_insn_uid);
540 
541   /* Perform transitive closure for conditional copies.  */
542   while (!VEC_empty (rtx, state->work_list))
543     {
544       rtx def_insn = VEC_pop (rtx, state->work_list);
545       rtx reg1, reg2;
546 
547       gcc_assert (INSN_UID (def_insn) < max_insn_uid);
548 
549       if (is_insn_visited[INSN_UID (def_insn)])
550 	continue;
551       is_insn_visited[INSN_UID (def_insn)] = true;
552 
553       if (is_cond_copy_insn (def_insn, &reg1, &reg2))
554 	{
555 	  /* Push it onto the copy list first.  */
556 	  VEC_safe_push (rtx, heap, state->copies_list, def_insn);
557 
558 	  /* Now perform the transitive closure.  */
559 	  if (!get_defs (def_insn, reg1, &state->work_list)
560 	      || !get_defs (def_insn, reg2, &state->work_list))
561 	    {
562 	      ret = false;
563 	      break;
564 	    }
565         }
566       else
567 	VEC_safe_push (rtx, heap, state->defs_list, def_insn);
568     }
569 
570   XDELETEVEC (is_insn_visited);
571 
572   return ret;
573 }
574 
575 /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
576    on the SET pattern.  */
577 
578 static bool
579 merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
580 {
581   enum machine_mode ext_src_mode;
582   enum rtx_code code;
583   rtx *sub_rtx;
584   rtx s_expr;
585   int i;
586 
587   ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
588   code = GET_CODE (PATTERN (def_insn));
589   sub_rtx = NULL;
590 
591   if (code == PARALLEL)
592     {
593       for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
594         {
595           s_expr = XVECEXP (PATTERN (def_insn), 0, i);
596           if (GET_CODE (s_expr) != SET)
597             continue;
598 
599           if (sub_rtx == NULL)
600             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
601           else
602             {
603               /* PARALLEL with multiple SETs.  */
604               return false;
605             }
606         }
607     }
608   else if (code == SET)
609     sub_rtx = &PATTERN (def_insn);
610   else
611     {
612       /* It is not a PARALLEL or a SET, what could it be ? */
613       return false;
614     }
615 
616   gcc_assert (sub_rtx != NULL);
617 
618   if (REG_P (SET_DEST (*sub_rtx))
619       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
620 	  || ((state->modified[INSN_UID (def_insn)].kind
621 	       == (cand->code == ZERO_EXTEND
622 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
623 	      && state->modified[INSN_UID (def_insn)].mode
624 		 == ext_src_mode)))
625     {
626       if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
627 	  >= GET_MODE_SIZE (cand->mode))
628 	return true;
629       /* If def_insn is already scheduled to be deleted, don't attempt
630 	 to modify it.  */
631       if (state->modified[INSN_UID (def_insn)].deleted)
632 	return false;
633       if (combine_set_extension (cand, def_insn, sub_rtx))
634 	{
635 	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
636 	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
637 	  return true;
638 	}
639     }
640 
641   return false;
642 }
643 
644 /* This function goes through all reaching defs of the source
645    of the candidate for elimination (CAND) and tries to combine
646    the extension with the definition instruction.  The changes
647    are made as a group so that even if one definition cannot be
648    merged, all reaching definitions end up not being merged.
649    When a conditional copy is encountered, merging is attempted
650    transitively on its definitions.  It returns true upon success
651    and false upon failure.  */
652 
653 static bool
654 combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
655 {
656   rtx def_insn;
657   bool merge_successful = true;
658   int i;
659   int defs_ix;
660   bool outcome;
661 
662   VEC_truncate (rtx, state->defs_list, 0);
663   VEC_truncate (rtx, state->copies_list, 0);
664 
665   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
666 
667   if (!outcome)
668     return false;
669 
670   /* If cand->insn has been already modified, update cand->mode to a wider
671      mode if possible, or punt.  */
672   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
673     {
674       enum machine_mode mode;
675       rtx set;
676 
677       if (state->modified[INSN_UID (cand->insn)].kind
678 	  != (cand->code == ZERO_EXTEND
679 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
680 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
681 	  || (set = single_set (cand->insn)) == NULL_RTX)
682 	return false;
683       mode = GET_MODE (SET_DEST (set));
684       gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
685       cand->mode = mode;
686     }
687 
688   merge_successful = true;
689 
690   /* Go through the defs vector and try to merge all the definitions
691      in this vector.  */
692   VEC_truncate (rtx, state->modified_list, 0);
693   FOR_EACH_VEC_ELT (rtx, state->defs_list, defs_ix, def_insn)
694     {
695       if (merge_def_and_ext (cand, def_insn, state))
696 	VEC_safe_push (rtx, heap, state->modified_list, def_insn);
697       else
698         {
699           merge_successful = false;
700           break;
701         }
702     }
703 
704   /* Now go through the conditional copies vector and try to merge all
705      the copies in this vector.  */
706   if (merge_successful)
707     {
708       FOR_EACH_VEC_ELT (rtx, state->copies_list, i, def_insn)
709         {
710           if (transform_ifelse (cand, def_insn))
711 	    VEC_safe_push (rtx, heap, state->modified_list, def_insn);
712           else
713             {
714               merge_successful = false;
715               break;
716             }
717         }
718     }
719 
720   if (merge_successful)
721     {
722       /* Commit the changes here if possible
723 	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
724 	 cannot be merged, we entirely give up.  In the future, we should allow
725 	 extensions to be partially eliminated along those paths where the
726 	 definitions could be merged.  */
727       if (apply_change_group ())
728         {
729           if (dump_file)
730             fprintf (dump_file, "All merges were successful.\n");
731 
732 	  FOR_EACH_VEC_ELT (rtx, state->modified_list, i, def_insn)
733 	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
734 	      state->modified[INSN_UID (def_insn)].kind
735 		= (cand->code == ZERO_EXTEND
736 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
737 
738           return true;
739         }
740       else
741         {
742           /* Changes need not be cancelled explicitly as apply_change_group
743              does it.  Print list of definitions in the dump_file for debug
744              purposes.  This extension cannot be deleted.  */
745           if (dump_file)
746             {
747 	      fprintf (dump_file,
748 		       "Merge cancelled, non-mergeable definitions:\n");
749 	      FOR_EACH_VEC_ELT (rtx, state->modified_list, i, def_insn)
750 	        print_rtl_single (dump_file, def_insn);
751             }
752         }
753     }
754   else
755     {
756       /* Cancel any changes that have been made so far.  */
757       cancel_changes (0);
758     }
759 
760   return false;
761 }
762 
763 /* Add an extension pattern that could be eliminated.  */
764 
765 static void
766 add_removable_extension (const_rtx expr, rtx insn,
767 			 VEC (ext_cand, heap) **insn_list,
768 			 unsigned *def_map)
769 {
770   enum rtx_code code;
771   enum machine_mode mode;
772   unsigned int idx;
773   rtx src, dest;
774 
775   /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
776   if (GET_CODE (expr) != SET)
777     return;
778 
779   src = SET_SRC (expr);
780   code = GET_CODE (src);
781   dest = SET_DEST (expr);
782   mode = GET_MODE (dest);
783 
784   if (REG_P (dest)
785       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
786       && REG_P (XEXP (src, 0))
787       && REGNO (dest) == REGNO (XEXP (src, 0)))
788     {
789       struct df_link *defs, *def;
790       ext_cand *cand;
791 
792       /* First, make sure we can get all the reaching definitions.  */
793       defs = get_defs (insn, XEXP (src, 0), NULL);
794       if (!defs)
795 	{
796 	  if (dump_file)
797 	    {
798 	      fprintf (dump_file, "Cannot eliminate extension:\n");
799 	      print_rtl_single (dump_file, insn);
800 	      fprintf (dump_file, " because of missing definition(s)\n");
801 	    }
802 	  return;
803 	}
804 
805       /* Second, make sure the reaching definitions don't feed another and
806 	 different extension.  FIXME: this obviously can be improved.  */
807       for (def = defs; def; def = def->next)
808 	if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
809 	    && (cand = VEC_index (ext_cand, *insn_list, idx - 1))
810 	    && (cand->code != code || cand->mode != mode))
811 	  {
812 	    if (dump_file)
813 	      {
814 	        fprintf (dump_file, "Cannot eliminate extension:\n");
815 		print_rtl_single (dump_file, insn);
816 	        fprintf (dump_file, " because of other extension\n");
817 	      }
818 	    return;
819 	  }
820 
821       /* Then add the candidate to the list and insert the reaching definitions
822          into the definition map.  */
823       cand = VEC_safe_push (ext_cand, heap, *insn_list, NULL);
824       cand->expr = expr;
825       cand->code = code;
826       cand->mode = mode;
827       cand->insn = insn;
828       idx = VEC_length (ext_cand, *insn_list);
829 
830       for (def = defs; def; def = def->next)
831 	def_map[INSN_UID(DF_REF_INSN (def->ref))] = idx;
832     }
833 }
834 
835 /* Traverse the instruction stream looking for extensions and return the
836    list of candidates.  */
837 
838 static VEC (ext_cand, heap)*
839 find_removable_extensions (void)
840 {
841   VEC (ext_cand, heap) *insn_list = NULL;
842   basic_block bb;
843   rtx insn, set;
844   unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
845 
846   FOR_EACH_BB (bb)
847     FOR_BB_INSNS (bb, insn)
848       {
849 	if (!NONDEBUG_INSN_P (insn))
850 	  continue;
851 
852 	set = single_set (insn);
853 	if (set == NULL_RTX)
854 	  continue;
855 	add_removable_extension (set, insn, &insn_list, def_map);
856       }
857 
858   XDELETEVEC (def_map);
859 
860   return insn_list;
861 }
862 
863 /* This is the main function that checks the insn stream for redundant
864    extensions and tries to remove them if possible.  */
865 
866 static void
867 find_and_remove_re (void)
868 {
869   ext_cand *curr_cand;
870   rtx curr_insn = NULL_RTX;
871   int num_re_opportunities = 0, num_realized = 0, i;
872   VEC (ext_cand, heap) *reinsn_list;
873   VEC (rtx, heap) *reinsn_del_list;
874   ext_state state;
875 
876   /* Construct DU chain to get all reaching definitions of each
877      extension instruction.  */
878   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
879   df_analyze ();
880   df_set_flags (DF_DEFER_INSN_RESCAN);
881 
882   max_insn_uid = get_max_uid ();
883   reinsn_del_list = NULL;
884   reinsn_list = find_removable_extensions ();
885   state.defs_list = NULL;
886   state.copies_list = NULL;
887   state.modified_list = NULL;
888   state.work_list = NULL;
889   if (VEC_empty (ext_cand, reinsn_list))
890     state.modified = NULL;
891   else
892     state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
893 
894   FOR_EACH_VEC_ELT (ext_cand, reinsn_list, i, curr_cand)
895     {
896       num_re_opportunities++;
897 
898       /* Try to combine the extension with the definition.  */
899       if (dump_file)
900         {
901           fprintf (dump_file, "Trying to eliminate extension:\n");
902           print_rtl_single (dump_file, curr_cand->insn);
903         }
904 
905       if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
906         {
907           if (dump_file)
908             fprintf (dump_file, "Eliminated the extension.\n");
909           num_realized++;
910           VEC_safe_push (rtx, heap, reinsn_del_list, curr_cand->insn);
911 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
912         }
913     }
914 
915   /* Delete all useless extensions here in one sweep.  */
916   FOR_EACH_VEC_ELT (rtx, reinsn_del_list, i, curr_insn)
917     delete_insn (curr_insn);
918 
919   VEC_free (ext_cand, heap, reinsn_list);
920   VEC_free (rtx, heap, reinsn_del_list);
921   VEC_free (rtx, heap, state.defs_list);
922   VEC_free (rtx, heap, state.copies_list);
923   VEC_free (rtx, heap, state.modified_list);
924   VEC_free (rtx, heap, state.work_list);
925   XDELETEVEC (state.modified);
926 
927   if (dump_file && num_re_opportunities > 0)
928     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
929 	     num_re_opportunities, num_realized);
930 
931   df_finish_pass (false);
932 }
933 
934 /* Find and remove redundant extensions.  */
935 
936 static unsigned int
937 rest_of_handle_ree (void)
938 {
939   timevar_push (TV_REE);
940   find_and_remove_re ();
941   timevar_pop (TV_REE);
942   return 0;
943 }
944 
945 /* Run REE pass when flag_ree is set at optimization level > 0.  */
946 
947 static bool
948 gate_handle_ree (void)
949 {
950   return (optimize > 0 && flag_ree);
951 }
952 
953 struct rtl_opt_pass pass_ree =
954 {
955  {
956   RTL_PASS,
957   "ree",                                /* name */
958   gate_handle_ree,                      /* gate */
959   rest_of_handle_ree,                   /* execute */
960   NULL,                                 /* sub */
961   NULL,                                 /* next */
962   0,                                    /* static_pass_number */
963   TV_REE,                               /* tv_id */
964   0,                                    /* properties_required */
965   0,                                    /* properties_provided */
966   0,                                    /* properties_destroyed */
967   0,                                    /* todo_flags_start */
968   TODO_ggc_collect |
969   TODO_verify_rtl_sharing,              /* todo_flags_finish */
970  }
971 };
972