1 /* Redundant Extension Elimination pass for the GNU compiler.
2    Copyright (C) 2010-2013 Free Software Foundation, Inc.
3    Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4 
5    Based on the Redundant Zero-extension elimination pass contributed by
6    Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 
25 /* Problem Description :
26    --------------------
27    This pass is intended to remove redundant extension instructions.
28    Such instructions appear for different reasons.  We expect some of
29    them due to implicit zero-extension in 64-bit registers after writing
30    to their lower 32-bit half (e.g. for the x86-64 architecture).
31    Another possible reason is a type cast which follows a load (for
32    instance a register restore) and which can be combined into a single
33    instruction, and for which earlier local passes, e.g. the combiner,
34    weren't able to optimize.
35 
36    How does this pass work  ?
37    --------------------------
38 
39    This pass is run after register allocation.  Hence, all registers that
40    this pass deals with are hard registers.  This pass first looks for an
41    extension instruction that could possibly be redundant.  Such extension
42    instructions show up in RTL with the pattern  :
43    (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44    where x can be any hard register.
45    Now, this pass tries to eliminate this instruction by merging the
46    extension with the definitions of register x.  For instance, if
47    one of the definitions of register x was  :
48    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49    followed by extension  :
50    (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51    then the combination converts this into :
52    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53    If all the merged definitions are recognizable assembly instructions,
54    the extension is effectively eliminated.
55 
56    For example, for the x86-64 architecture, implicit zero-extensions
57    are captured with appropriate patterns in the i386.md file.  Hence,
58    these merged definition can be matched to a single assembly instruction.
59    The original extension instruction is then deleted if all the
60    definitions can be merged.
61 
62    However, there are cases where the definition instruction cannot be
63    merged with an extension.  Examples are CALL instructions.  In such
64    cases, the original extension is not redundant and this pass does
65    not delete it.
66 
67    Handling conditional moves :
68    ----------------------------
69 
70    Architectures like x86-64 support conditional moves whose semantics for
71    extension differ from the other instructions.  For instance, the
72    instruction *cmov ebx, eax*
73    zero-extends eax onto rax only when the move from ebx to eax happens.
74    Otherwise, eax may not be zero-extended.  Consider conditional moves as
75    RTL instructions of the form
76    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77    This pass tries to merge an extension with a conditional move by
78    actually merging the definitions of y and z with an extension and then
79    converting the conditional move into :
80    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81    Since registers y and z are extended, register x will also be extended
82    after the conditional move.  Note that this step has to be done
83    transitively since the definition of a conditional copy can be
84    another conditional copy.
85 
86    Motivating Example I :
87    ---------------------
88    For this program :
89    **********************************************
90    bad_code.c
91 
92    int mask[1000];
93 
94    int foo(unsigned x)
95    {
96      if (x < 10)
97        x = x * 45;
98      else
99        x = x * 78;
100      return mask[x];
101    }
102    **********************************************
103 
104    $ gcc -O2 bad_code.c
105      ........
106      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107      40031a:       0f af f8                imul   %eax,%edi
108      40031d:       89 ff                   mov    %edi,%edi - useless extension
109      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110      400326:       c3                      retq
111      ......
112      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113      400335:       0f af fa                imul   %edx,%edi
114      400338:       89 ff                   mov    %edi,%edi - useless extension
115      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116      400341:       c3                      retq
117 
118    $ gcc -O2 -free bad_code.c
119      ......
120      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122      40031f:       c3                      retq
123      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125      40032a:       c3                      retq
126 
127    Motivating Example II :
128    ---------------------
129 
130    Here is an example with a conditional move.
131 
132    For this program :
133    **********************************************
134 
135    unsigned long long foo(unsigned x , unsigned y)
136    {
137      unsigned z;
138      if (x > 100)
139        z = x + y;
140      else
141        z = x - y;
142      return (unsigned long long)(z);
143    }
144 
145    $ gcc -O2 bad_code.c
146      ............
147      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148      400363:       89 f8                   mov    %edi,%eax
149      400365:       29 f0                   sub    %esi,%eax
150      400367:       83 ff 65                cmp    $0x65,%edi
151      40036a:       0f 43 c2                cmovae %edx,%eax
152      40036d:       89 c0                   mov    %eax,%eax - useless extension
153      40036f:       c3                      retq
154 
155    $ gcc -O2 -free bad_code.c
156      .............
157      400360:       89 fa                   mov    %edi,%edx
158      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159      400365:       29 f2                   sub    %esi,%edx
160      400367:       83 ff 65                cmp    $0x65,%edi
161      40036a:       89 d6                   mov    %edx,%esi
162      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163      400370:       c3                      retq
164 
165   Motivating Example III :
166   ---------------------
167 
168   Here is an example with a type cast.
169 
170   For this program :
171   **********************************************
172 
173   void test(int size, unsigned char *in, unsigned char *out)
174   {
175     int i;
176     unsigned char xr, xg, xy=0;
177 
178     for (i = 0; i < size; i++) {
179       xr = *in++;
180       xg = *in++;
181       xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182       *out++ = xy;
183     }
184   }
185 
186   $ gcc -O2 bad_code.c
187     ............
188     10:   0f b6 0e                movzbl (%rsi),%ecx
189     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190     17:   48 83 c6 02             add    $0x2,%rsi
191     1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192     1e:   0f b6 c0                movzbl %al,%eax - useless extension
193     21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194     27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195 
196    $ gcc -O2 -free bad_code.c
197      .............
198     10:   0f b6 0e                movzbl (%rsi),%ecx
199     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200     17:   48 83 c6 02             add    $0x2,%rsi
201     1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202     21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203 
204    Usefulness :
205    ----------
206 
207    The original redundant zero-extension elimination pass reported reduction
208    of the dynamic instruction count of a compression benchmark by 2.8% and
209    improvement of its run time by about 1%.
210 
211    The additional performance gain with the enhanced pass is mostly expected
212    on in-order architectures where redundancy cannot be compensated by out of
213    order execution.  Measurements showed up to 10% performance gain (reduced
214    run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215    gain 1%.  */
216 
217 
218 #include "config.h"
219 #include "system.h"
220 #include "coretypes.h"
221 #include "tm.h"
222 #include "rtl.h"
223 #include "tree.h"
224 #include "tm_p.h"
225 #include "flags.h"
226 #include "regs.h"
227 #include "hard-reg-set.h"
228 #include "basic-block.h"
229 #include "insn-config.h"
230 #include "function.h"
231 #include "expr.h"
232 #include "insn-attr.h"
233 #include "recog.h"
234 #include "diagnostic-core.h"
235 #include "target.h"
236 #include "optabs.h"
237 #include "insn-codes.h"
238 #include "rtlhooks-def.h"
239 #include "params.h"
240 #include "tree-pass.h"
241 #include "df.h"
242 #include "cgraph.h"
243 
244 /* This structure represents a candidate for elimination.  */
245 
246 typedef struct ext_cand
247 {
248   /* The expression.  */
249   const_rtx expr;
250 
251   /* The kind of extension.  */
252   enum rtx_code code;
253 
254   /* The destination mode.  */
255   enum machine_mode mode;
256 
257   /* The instruction where it lives.  */
258   rtx insn;
259 } ext_cand;
260 
261 
262 static int max_insn_uid;
263 
264 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
265    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
266    this code modifies the SET rtx to a new SET rtx that extends the
267    right hand expression into a register on the left hand side.  Note
268    that multiple assumptions are made about the nature of the set that
269    needs to be true for this to work and is called from merge_def_and_ext.
270 
271    Original :
272    (set (reg a) (expression))
273 
274    Transform :
275    (set (reg a) (any_extend (expression)))
276 
277    Special Cases :
278    If the expression is a constant or another extension, then directly
279    assign it to the register.  */
280 
281 static bool
combine_set_extension(ext_cand * cand,rtx curr_insn,rtx * orig_set)282 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
283 {
284   rtx orig_src = SET_SRC (*orig_set);
285   rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
286   rtx new_set;
287 
288   /* Merge constants by directly moving the constant into the register under
289      some conditions.  Recall that RTL constants are sign-extended.  */
290   if (GET_CODE (orig_src) == CONST_INT
291       && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
292     {
293       if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
294 	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
295       else
296 	{
297 	  /* Zero-extend the negative constant by masking out the bits outside
298 	     the source mode.  */
299 	  enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
300 	  rtx new_const_int
301 	    = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode));
302 	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
303 	}
304     }
305   else if (GET_MODE (orig_src) == VOIDmode)
306     {
307       /* This is mostly due to a call insn that should not be optimized.  */
308       return false;
309     }
310   else if (GET_CODE (orig_src) == cand->code)
311     {
312       /* Here is a sequence of two extensions.  Try to merge them.  */
313       rtx temp_extension
314 	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
315       rtx simplified_temp_extension = simplify_rtx (temp_extension);
316       if (simplified_temp_extension)
317         temp_extension = simplified_temp_extension;
318       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
319     }
320   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
321     {
322       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
323          in general, IF_THEN_ELSE should not be combined.  */
324       return false;
325     }
326   else
327     {
328       /* This is the normal case.  */
329       rtx temp_extension
330 	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
331       rtx simplified_temp_extension = simplify_rtx (temp_extension);
332       if (simplified_temp_extension)
333         temp_extension = simplified_temp_extension;
334       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
335     }
336 
337   /* This change is a part of a group of changes.  Hence,
338      validate_change will not try to commit the change.  */
339   if (validate_change (curr_insn, orig_set, new_set, true))
340     {
341       if (dump_file)
342         {
343           fprintf (dump_file,
344 		   "Tentatively merged extension with definition:\n");
345           print_rtl_single (dump_file, curr_insn);
346         }
347       return true;
348     }
349 
350   return false;
351 }
352 
353 /* Treat if_then_else insns, where the operands of both branches
354    are registers, as copies.  For instance,
355    Original :
356    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
357    Transformed :
358    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
359    DEF_INSN is the if_then_else insn.  */
360 
361 static bool
transform_ifelse(ext_cand * cand,rtx def_insn)362 transform_ifelse (ext_cand *cand, rtx def_insn)
363 {
364   rtx set_insn = PATTERN (def_insn);
365   rtx srcreg, dstreg, srcreg2;
366   rtx map_srcreg, map_dstreg, map_srcreg2;
367   rtx ifexpr;
368   rtx cond;
369   rtx new_set;
370 
371   gcc_assert (GET_CODE (set_insn) == SET);
372 
373   cond = XEXP (SET_SRC (set_insn), 0);
374   dstreg = SET_DEST (set_insn);
375   srcreg = XEXP (SET_SRC (set_insn), 1);
376   srcreg2 = XEXP (SET_SRC (set_insn), 2);
377   /* If the conditional move already has the right or wider mode,
378      there is nothing to do.  */
379   if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
380     return true;
381 
382   map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
383   map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
384   map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
385   ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
386   new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
387 
388   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
389     {
390       if (dump_file)
391         {
392           fprintf (dump_file,
393 		   "Mode of conditional move instruction extended:\n");
394           print_rtl_single (dump_file, def_insn);
395         }
396       return true;
397     }
398 
399   return false;
400 }
401 
402 /* Get all the reaching definitions of an instruction.  The definitions are
403    desired for REG used in INSN.  Return the definition list or NULL if a
404    definition is missing.  If DEST is non-NULL, additionally push the INSN
405    of the definitions onto DEST.  */
406 
407 static struct df_link *
get_defs(rtx insn,rtx reg,vec<rtx> * dest)408 get_defs (rtx insn, rtx reg, vec<rtx> *dest)
409 {
410   df_ref reg_info, *uses;
411   struct df_link *ref_chain, *ref_link;
412 
413   reg_info = NULL;
414 
415   for (uses = DF_INSN_USES (insn); *uses; uses++)
416     {
417       reg_info = *uses;
418       if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
419         return NULL;
420       if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
421         break;
422     }
423 
424   gcc_assert (reg_info != NULL && uses != NULL);
425 
426   ref_chain = DF_REF_CHAIN (reg_info);
427 
428   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
429     {
430       /* Problem getting some definition for this instruction.  */
431       if (ref_link->ref == NULL)
432         return NULL;
433       if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
434         return NULL;
435     }
436 
437   if (dest)
438     for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
439       dest->safe_push (DF_REF_INSN (ref_link->ref));
440 
441   return ref_chain;
442 }
443 
444 /* Return true if INSN is
445      (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
446    and store x1 and x2 in REG_1 and REG_2.  */
447 
448 static bool
is_cond_copy_insn(rtx insn,rtx * reg1,rtx * reg2)449 is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
450 {
451   rtx expr = single_set (insn);
452 
453   if (expr != NULL_RTX
454       && GET_CODE (expr) == SET
455       && GET_CODE (SET_DEST (expr)) == REG
456       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
457       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
458       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
459     {
460       *reg1 = XEXP (SET_SRC (expr), 1);
461       *reg2 = XEXP (SET_SRC (expr), 2);
462       return true;
463     }
464 
465   return false;
466 }
467 
468 enum ext_modified_kind
469 {
470   /* The insn hasn't been modified by ree pass yet.  */
471   EXT_MODIFIED_NONE,
472   /* Changed into zero extension.  */
473   EXT_MODIFIED_ZEXT,
474   /* Changed into sign extension.  */
475   EXT_MODIFIED_SEXT
476 };
477 
478 struct ATTRIBUTE_PACKED ext_modified
479 {
480   /* Mode from which ree has zero or sign extended the destination.  */
481   ENUM_BITFIELD(machine_mode) mode : 8;
482 
483   /* Kind of modification of the insn.  */
484   ENUM_BITFIELD(ext_modified_kind) kind : 2;
485 
486   /* True if the insn is scheduled to be deleted.  */
487   unsigned int deleted : 1;
488 };
489 
490 /* Vectors used by combine_reaching_defs and its helpers.  */
491 typedef struct ext_state
492 {
493   /* In order to avoid constant alloc/free, we keep these
494      4 vectors live through the entire find_and_remove_re and just
495      truncate them each time.  */
496   vec<rtx> defs_list;
497   vec<rtx> copies_list;
498   vec<rtx> modified_list;
499   vec<rtx> work_list;
500 
501   /* For instructions that have been successfully modified, this is
502      the original mode from which the insn is extending and
503      kind of extension.  */
504   struct ext_modified *modified;
505 } ext_state;
506 
507 /* Reaching Definitions of the extended register could be conditional copies
508    or regular definitions.  This function separates the two types into two
509    lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
510    if a reaching definition is a conditional copy, merging the extension with
511    this definition is wrong.  Conditional copies are merged by transitively
512    merging their definitions.  The defs_list is populated with all the reaching
513    definitions of the extension instruction (EXTEND_INSN) which must be merged
514    with an extension.  The copies_list contains all the conditional moves that
515    will later be extended into a wider mode conditional move if all the merges
516    are successful.  The function returns false upon failure, true upon
517    success.  */
518 
519 static bool
make_defs_and_copies_lists(rtx extend_insn,const_rtx set_pat,ext_state * state)520 make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
521 			    ext_state *state)
522 {
523   rtx src_reg = XEXP (SET_SRC (set_pat), 0);
524   bool *is_insn_visited;
525   bool ret = true;
526 
527   state->work_list.truncate (0);
528 
529   /* Initialize the work list.  */
530   if (!get_defs (extend_insn, src_reg, &state->work_list))
531     gcc_unreachable ();
532 
533   is_insn_visited = XCNEWVEC (bool, max_insn_uid);
534 
535   /* Perform transitive closure for conditional copies.  */
536   while (!state->work_list.is_empty ())
537     {
538       rtx def_insn = state->work_list.pop ();
539       rtx reg1, reg2;
540 
541       gcc_assert (INSN_UID (def_insn) < max_insn_uid);
542 
543       if (is_insn_visited[INSN_UID (def_insn)])
544 	continue;
545       is_insn_visited[INSN_UID (def_insn)] = true;
546 
547       if (is_cond_copy_insn (def_insn, &reg1, &reg2))
548 	{
549 	  /* Push it onto the copy list first.  */
550 	  state->copies_list.safe_push (def_insn);
551 
552 	  /* Now perform the transitive closure.  */
553 	  if (!get_defs (def_insn, reg1, &state->work_list)
554 	      || !get_defs (def_insn, reg2, &state->work_list))
555 	    {
556 	      ret = false;
557 	      break;
558 	    }
559         }
560       else
561 	state->defs_list.safe_push (def_insn);
562     }
563 
564   XDELETEVEC (is_insn_visited);
565 
566   return ret;
567 }
568 
569 /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
570    on the SET pattern.  */
571 
572 static bool
merge_def_and_ext(ext_cand * cand,rtx def_insn,ext_state * state)573 merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
574 {
575   enum machine_mode ext_src_mode;
576   enum rtx_code code;
577   rtx *sub_rtx;
578   rtx s_expr;
579   int i;
580 
581   ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
582   code = GET_CODE (PATTERN (def_insn));
583   sub_rtx = NULL;
584 
585   if (code == PARALLEL)
586     {
587       for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
588         {
589           s_expr = XVECEXP (PATTERN (def_insn), 0, i);
590           if (GET_CODE (s_expr) != SET)
591             continue;
592 
593           if (sub_rtx == NULL)
594             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
595           else
596             {
597               /* PARALLEL with multiple SETs.  */
598               return false;
599             }
600         }
601     }
602   else if (code == SET)
603     sub_rtx = &PATTERN (def_insn);
604   else
605     {
606       /* It is not a PARALLEL or a SET, what could it be ? */
607       return false;
608     }
609 
610   gcc_assert (sub_rtx != NULL);
611 
612   if (REG_P (SET_DEST (*sub_rtx))
613       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
614 	  || ((state->modified[INSN_UID (def_insn)].kind
615 	       == (cand->code == ZERO_EXTEND
616 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
617 	      && state->modified[INSN_UID (def_insn)].mode
618 		 == ext_src_mode)))
619     {
620       if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
621 	  >= GET_MODE_SIZE (cand->mode))
622 	return true;
623       /* If def_insn is already scheduled to be deleted, don't attempt
624 	 to modify it.  */
625       if (state->modified[INSN_UID (def_insn)].deleted)
626 	return false;
627       if (combine_set_extension (cand, def_insn, sub_rtx))
628 	{
629 	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
630 	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
631 	  return true;
632 	}
633     }
634 
635   return false;
636 }
637 
638 /* This function goes through all reaching defs of the source
639    of the candidate for elimination (CAND) and tries to combine
640    the extension with the definition instruction.  The changes
641    are made as a group so that even if one definition cannot be
642    merged, all reaching definitions end up not being merged.
643    When a conditional copy is encountered, merging is attempted
644    transitively on its definitions.  It returns true upon success
645    and false upon failure.  */
646 
647 static bool
combine_reaching_defs(ext_cand * cand,const_rtx set_pat,ext_state * state)648 combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
649 {
650   rtx def_insn;
651   bool merge_successful = true;
652   int i;
653   int defs_ix;
654   bool outcome;
655 
656   state->defs_list.truncate (0);
657   state->copies_list.truncate (0);
658 
659   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
660 
661   if (!outcome)
662     return false;
663 
664   /* If cand->insn has been already modified, update cand->mode to a wider
665      mode if possible, or punt.  */
666   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
667     {
668       enum machine_mode mode;
669       rtx set;
670 
671       if (state->modified[INSN_UID (cand->insn)].kind
672 	  != (cand->code == ZERO_EXTEND
673 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
674 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
675 	  || (set = single_set (cand->insn)) == NULL_RTX)
676 	return false;
677       mode = GET_MODE (SET_DEST (set));
678       gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
679       cand->mode = mode;
680     }
681 
682   merge_successful = true;
683 
684   /* Go through the defs vector and try to merge all the definitions
685      in this vector.  */
686   state->modified_list.truncate (0);
687   FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
688     {
689       if (merge_def_and_ext (cand, def_insn, state))
690 	state->modified_list.safe_push (def_insn);
691       else
692         {
693           merge_successful = false;
694           break;
695         }
696     }
697 
698   /* Now go through the conditional copies vector and try to merge all
699      the copies in this vector.  */
700   if (merge_successful)
701     {
702       FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
703         {
704           if (transform_ifelse (cand, def_insn))
705 	    state->modified_list.safe_push (def_insn);
706           else
707             {
708               merge_successful = false;
709               break;
710             }
711         }
712     }
713 
714   if (merge_successful)
715     {
716       /* Commit the changes here if possible
717 	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
718 	 cannot be merged, we entirely give up.  In the future, we should allow
719 	 extensions to be partially eliminated along those paths where the
720 	 definitions could be merged.  */
721       if (apply_change_group ())
722         {
723           if (dump_file)
724             fprintf (dump_file, "All merges were successful.\n");
725 
726 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
727 	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
728 	      state->modified[INSN_UID (def_insn)].kind
729 		= (cand->code == ZERO_EXTEND
730 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);
731 
732           return true;
733         }
734       else
735         {
736           /* Changes need not be cancelled explicitly as apply_change_group
737              does it.  Print list of definitions in the dump_file for debug
738              purposes.  This extension cannot be deleted.  */
739           if (dump_file)
740             {
741 	      fprintf (dump_file,
742 		       "Merge cancelled, non-mergeable definitions:\n");
743 	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
744 	        print_rtl_single (dump_file, def_insn);
745             }
746         }
747     }
748   else
749     {
750       /* Cancel any changes that have been made so far.  */
751       cancel_changes (0);
752     }
753 
754   return false;
755 }
756 
757 /* Add an extension pattern that could be eliminated.  */
758 
759 static void
add_removable_extension(const_rtx expr,rtx insn,vec<ext_cand> * insn_list,unsigned * def_map)760 add_removable_extension (const_rtx expr, rtx insn,
761 			 vec<ext_cand> *insn_list,
762 			 unsigned *def_map)
763 {
764   enum rtx_code code;
765   enum machine_mode mode;
766   unsigned int idx;
767   rtx src, dest;
768 
769   /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
770   if (GET_CODE (expr) != SET)
771     return;
772 
773   src = SET_SRC (expr);
774   code = GET_CODE (src);
775   dest = SET_DEST (expr);
776   mode = GET_MODE (dest);
777 
778   if (REG_P (dest)
779       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
780       && REG_P (XEXP (src, 0))
781       && REGNO (dest) == REGNO (XEXP (src, 0)))
782     {
783       struct df_link *defs, *def;
784       ext_cand *cand;
785 
786       /* First, make sure we can get all the reaching definitions.  */
787       defs = get_defs (insn, XEXP (src, 0), NULL);
788       if (!defs)
789 	{
790 	  if (dump_file)
791 	    {
792 	      fprintf (dump_file, "Cannot eliminate extension:\n");
793 	      print_rtl_single (dump_file, insn);
794 	      fprintf (dump_file, " because of missing definition(s)\n");
795 	    }
796 	  return;
797 	}
798 
799       /* Second, make sure the reaching definitions don't feed another and
800 	 different extension.  FIXME: this obviously can be improved.  */
801       for (def = defs; def; def = def->next)
802 	if ((idx = def_map[INSN_UID(DF_REF_INSN (def->ref))])
803 	    && (cand = &(*insn_list)[idx - 1])
804 	    && cand->code != code)
805 	  {
806 	    if (dump_file)
807 	      {
808 	        fprintf (dump_file, "Cannot eliminate extension:\n");
809 		print_rtl_single (dump_file, insn);
810 	        fprintf (dump_file, " because of other extension\n");
811 	      }
812 	    return;
813 	  }
814 
815       /* Then add the candidate to the list and insert the reaching definitions
816          into the definition map.  */
817       ext_cand e = {expr, code, mode, insn};
818       insn_list->safe_push (e);
819       idx = insn_list->length ();
820 
821       for (def = defs; def; def = def->next)
822 	def_map[INSN_UID(DF_REF_INSN (def->ref))] = idx;
823     }
824 }
825 
826 /* Traverse the instruction stream looking for extensions and return the
827    list of candidates.  */
828 
829 static vec<ext_cand>
find_removable_extensions(void)830 find_removable_extensions (void)
831 {
832   vec<ext_cand> insn_list = vNULL;
833   basic_block bb;
834   rtx insn, set;
835   unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
836 
837   FOR_EACH_BB (bb)
838     FOR_BB_INSNS (bb, insn)
839       {
840 	if (!NONDEBUG_INSN_P (insn))
841 	  continue;
842 
843 	set = single_set (insn);
844 	if (set == NULL_RTX)
845 	  continue;
846 	add_removable_extension (set, insn, &insn_list, def_map);
847       }
848 
849   XDELETEVEC (def_map);
850 
851   return insn_list;
852 }
853 
854 /* This is the main function that checks the insn stream for redundant
855    extensions and tries to remove them if possible.  */
856 
857 static void
find_and_remove_re(void)858 find_and_remove_re (void)
859 {
860   ext_cand *curr_cand;
861   rtx curr_insn = NULL_RTX;
862   int num_re_opportunities = 0, num_realized = 0, i;
863   vec<ext_cand> reinsn_list;
864   vec<rtx> reinsn_del_list;
865   ext_state state;
866 
867   /* Construct DU chain to get all reaching definitions of each
868      extension instruction.  */
869   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
870   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
871   df_analyze ();
872   df_set_flags (DF_DEFER_INSN_RESCAN);
873 
874   max_insn_uid = get_max_uid ();
875   reinsn_del_list.create (0);
876   reinsn_list = find_removable_extensions ();
877   state.defs_list.create (0);
878   state.copies_list.create (0);
879   state.modified_list.create (0);
880   state.work_list.create (0);
881   if (reinsn_list.is_empty ())
882     state.modified = NULL;
883   else
884     state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
885 
886   FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
887     {
888       num_re_opportunities++;
889 
890       /* Try to combine the extension with the definition.  */
891       if (dump_file)
892         {
893           fprintf (dump_file, "Trying to eliminate extension:\n");
894           print_rtl_single (dump_file, curr_cand->insn);
895         }
896 
897       if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
898         {
899           if (dump_file)
900             fprintf (dump_file, "Eliminated the extension.\n");
901           num_realized++;
902           reinsn_del_list.safe_push (curr_cand->insn);
903 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
904         }
905     }
906 
907   /* Delete all useless extensions here in one sweep.  */
908   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
909     delete_insn (curr_insn);
910 
911   reinsn_list.release ();
912   reinsn_del_list.release ();
913   state.defs_list.release ();
914   state.copies_list.release ();
915   state.modified_list.release ();
916   state.work_list.release ();
917   XDELETEVEC (state.modified);
918 
919   if (dump_file && num_re_opportunities > 0)
920     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
921 	     num_re_opportunities, num_realized);
922 
923   df_finish_pass (false);
924 }
925 
926 /* Find and remove redundant extensions.  */
927 
928 static unsigned int
rest_of_handle_ree(void)929 rest_of_handle_ree (void)
930 {
931   timevar_push (TV_REE);
932   find_and_remove_re ();
933   timevar_pop (TV_REE);
934   return 0;
935 }
936 
937 /* Run REE pass when flag_ree is set at optimization level > 0.  */
938 
939 static bool
gate_handle_ree(void)940 gate_handle_ree (void)
941 {
942   return (optimize > 0 && flag_ree);
943 }
944 
945 struct rtl_opt_pass pass_ree =
946 {
947  {
948   RTL_PASS,
949   "ree",                                /* name */
950   OPTGROUP_NONE,                        /* optinfo_flags */
951   gate_handle_ree,                      /* gate */
952   rest_of_handle_ree,                   /* execute */
953   NULL,                                 /* sub */
954   NULL,                                 /* next */
955   0,                                    /* static_pass_number */
956   TV_REE,                               /* tv_id */
957   0,                                    /* properties_required */
958   0,                                    /* properties_provided */
959   0,                                    /* properties_destroyed */
960   0,                                    /* todo_flags_start */
961   TODO_ggc_collect |
962   TODO_verify_rtl_sharing,              /* todo_flags_finish */
963  }
964 };
965