1 /* Redundant Extension Elimination pass for the GNU compiler.
2    Copyright (C) 2010-2014 Free Software Foundation, Inc.
3    Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4 
5    Based on the Redundant Zero-extension elimination pass contributed by
6    Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 
8 This file is part of GCC.
9 
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14 
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18 for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3.  If not see
22 <http://www.gnu.org/licenses/>.  */
23 
24 
25 /* Problem Description :
26    --------------------
27    This pass is intended to remove redundant extension instructions.
28    Such instructions appear for different reasons.  We expect some of
29    them due to implicit zero-extension in 64-bit registers after writing
30    to their lower 32-bit half (e.g. for the x86-64 architecture).
31    Another possible reason is a type cast which follows a load (for
32    instance a register restore) and which can be combined into a single
33    instruction, and for which earlier local passes, e.g. the combiner,
34    weren't able to optimize.
35 
36    How does this pass work  ?
37    --------------------------
38 
39    This pass is run after register allocation.  Hence, all registers that
40    this pass deals with are hard registers.  This pass first looks for an
41    extension instruction that could possibly be redundant.  Such extension
42    instructions show up in RTL with the pattern  :
43    (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44    where x can be any hard register.
45    Now, this pass tries to eliminate this instruction by merging the
46    extension with the definitions of register x.  For instance, if
47    one of the definitions of register x was  :
48    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49    followed by extension  :
50    (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51    then the combination converts this into :
52    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53    If all the merged definitions are recognizable assembly instructions,
54    the extension is effectively eliminated.
55 
56    For example, for the x86-64 architecture, implicit zero-extensions
57    are captured with appropriate patterns in the i386.md file.  Hence,
58    these merged definition can be matched to a single assembly instruction.
59    The original extension instruction is then deleted if all the
60    definitions can be merged.
61 
62    However, there are cases where the definition instruction cannot be
63    merged with an extension.  Examples are CALL instructions.  In such
64    cases, the original extension is not redundant and this pass does
65    not delete it.
66 
67    Handling conditional moves :
68    ----------------------------
69 
70    Architectures like x86-64 support conditional moves whose semantics for
71    extension differ from the other instructions.  For instance, the
72    instruction *cmov ebx, eax*
73    zero-extends eax onto rax only when the move from ebx to eax happens.
74    Otherwise, eax may not be zero-extended.  Consider conditional moves as
75    RTL instructions of the form
76    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77    This pass tries to merge an extension with a conditional move by
78    actually merging the definitions of y and z with an extension and then
79    converting the conditional move into :
80    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81    Since registers y and z are extended, register x will also be extended
82    after the conditional move.  Note that this step has to be done
83    transitively since the definition of a conditional copy can be
84    another conditional copy.
85 
86    Motivating Example I :
87    ---------------------
88    For this program :
89    **********************************************
90    bad_code.c
91 
92    int mask[1000];
93 
94    int foo(unsigned x)
95    {
96      if (x < 10)
97        x = x * 45;
98      else
99        x = x * 78;
100      return mask[x];
101    }
102    **********************************************
103 
104    $ gcc -O2 bad_code.c
105      ........
106      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
107      40031a:       0f af f8                imul   %eax,%edi
108      40031d:       89 ff                   mov    %edi,%edi - useless extension
109      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
110      400326:       c3                      retq
111      ......
112      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
113      400335:       0f af fa                imul   %edx,%edi
114      400338:       89 ff                   mov    %edi,%edi - useless extension
115      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
116      400341:       c3                      retq
117 
118    $ gcc -O2 -free bad_code.c
119      ......
120      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
121      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
122      40031f:       c3                      retq
123      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
124      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
125      40032a:       c3                      retq
126 
127    Motivating Example II :
128    ---------------------
129 
130    Here is an example with a conditional move.
131 
132    For this program :
133    **********************************************
134 
135    unsigned long long foo(unsigned x , unsigned y)
136    {
137      unsigned z;
138      if (x > 100)
139        z = x + y;
140      else
141        z = x - y;
142      return (unsigned long long)(z);
143    }
144 
145    $ gcc -O2 bad_code.c
146      ............
147      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
148      400363:       89 f8                   mov    %edi,%eax
149      400365:       29 f0                   sub    %esi,%eax
150      400367:       83 ff 65                cmp    $0x65,%edi
151      40036a:       0f 43 c2                cmovae %edx,%eax
152      40036d:       89 c0                   mov    %eax,%eax - useless extension
153      40036f:       c3                      retq
154 
155    $ gcc -O2 -free bad_code.c
156      .............
157      400360:       89 fa                   mov    %edi,%edx
158      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
159      400365:       29 f2                   sub    %esi,%edx
160      400367:       83 ff 65                cmp    $0x65,%edi
161      40036a:       89 d6                   mov    %edx,%esi
162      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
163      400370:       c3                      retq
164 
165   Motivating Example III :
166   ---------------------
167 
168   Here is an example with a type cast.
169 
170   For this program :
171   **********************************************
172 
173   void test(int size, unsigned char *in, unsigned char *out)
174   {
175     int i;
176     unsigned char xr, xg, xy=0;
177 
178     for (i = 0; i < size; i++) {
179       xr = *in++;
180       xg = *in++;
181       xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182       *out++ = xy;
183     }
184   }
185 
186   $ gcc -O2 bad_code.c
187     ............
188     10:   0f b6 0e                movzbl (%rsi),%ecx
189     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
190     17:   48 83 c6 02             add    $0x2,%rsi
191     1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
192     1e:   0f b6 c0                movzbl %al,%eax - useless extension
193     21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
194     27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
195 
196    $ gcc -O2 -free bad_code.c
197      .............
198     10:   0f b6 0e                movzbl (%rsi),%ecx
199     13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
200     17:   48 83 c6 02             add    $0x2,%rsi
201     1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
202     21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203 
204    Usefulness :
205    ----------
206 
207    The original redundant zero-extension elimination pass reported reduction
208    of the dynamic instruction count of a compression benchmark by 2.8% and
209    improvement of its run time by about 1%.
210 
211    The additional performance gain with the enhanced pass is mostly expected
212    on in-order architectures where redundancy cannot be compensated by out of
213    order execution.  Measurements showed up to 10% performance gain (reduced
214    run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215    gain 1%.  */
216 
217 
218 #include "config.h"
219 #include "system.h"
220 #include "coretypes.h"
221 #include "tm.h"
222 #include "rtl.h"
223 #include "tree.h"
224 #include "tm_p.h"
225 #include "flags.h"
226 #include "regs.h"
227 #include "hard-reg-set.h"
228 #include "basic-block.h"
229 #include "insn-config.h"
230 #include "function.h"
231 #include "expr.h"
232 #include "insn-attr.h"
233 #include "recog.h"
234 #include "diagnostic-core.h"
235 #include "target.h"
236 #include "optabs.h"
237 #include "insn-codes.h"
238 #include "rtlhooks-def.h"
239 #include "params.h"
240 #include "tree-pass.h"
241 #include "df.h"
242 #include "cgraph.h"
243 
244 /* This structure represents a candidate for elimination.  */
245 
246 typedef struct ext_cand
247 {
248   /* The expression.  */
249   const_rtx expr;
250 
251   /* The kind of extension.  */
252   enum rtx_code code;
253 
254   /* The destination mode.  */
255   enum machine_mode mode;
256 
257   /* The instruction where it lives.  */
258   rtx insn;
259 } ext_cand;
260 
261 
262 static int max_insn_uid;
263 
264 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
265    and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
266    this code modifies the SET rtx to a new SET rtx that extends the
267    right hand expression into a register on the left hand side.  Note
268    that multiple assumptions are made about the nature of the set that
269    needs to be true for this to work and is called from merge_def_and_ext.
270 
271    Original :
272    (set (reg a) (expression))
273 
274    Transform :
275    (set (reg a) (any_extend (expression)))
276 
277    Special Cases :
278    If the expression is a constant or another extension, then directly
279    assign it to the register.  */
280 
281 static bool
combine_set_extension(ext_cand * cand,rtx curr_insn,rtx * orig_set)282 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
283 {
284   rtx orig_src = SET_SRC (*orig_set);
285   rtx new_set;
286   rtx cand_pat = PATTERN (cand->insn);
287 
288   /* If the extension's source/destination registers are not the same
289      then we need to change the original load to reference the destination
290      of the extension.  Then we need to emit a copy from that destination
291      to the original destination of the load.  */
292   rtx new_reg;
293   bool copy_needed
294     = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
295   if (copy_needed)
296     new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat)));
297   else
298     new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
299 
300 #if 0
301   /* Rethinking test.  Temporarily disabled.  */
302   /* We're going to be widening the result of DEF_INSN, ensure that doing so
303      doesn't change the number of hard registers needed for the result.  */
304   if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode)
305       != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)),
306 			   GET_MODE (SET_DEST (*orig_set))))
307 	return false;
308 #endif
309 
310   /* Merge constants by directly moving the constant into the register under
311      some conditions.  Recall that RTL constants are sign-extended.  */
312   if (GET_CODE (orig_src) == CONST_INT
313       && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
314     {
315       if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
316 	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
317       else
318 	{
319 	  /* Zero-extend the negative constant by masking out the bits outside
320 	     the source mode.  */
321 	  enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
322 	  rtx new_const_int
323 	    = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (src_mode),
324 			    GET_MODE (new_reg));
325 	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
326 	}
327     }
328   else if (GET_MODE (orig_src) == VOIDmode)
329     {
330       /* This is mostly due to a call insn that should not be optimized.  */
331       return false;
332     }
333   else if (GET_CODE (orig_src) == cand->code)
334     {
335       /* Here is a sequence of two extensions.  Try to merge them.  */
336       rtx temp_extension
337 	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
338       rtx simplified_temp_extension = simplify_rtx (temp_extension);
339       if (simplified_temp_extension)
340         temp_extension = simplified_temp_extension;
341       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
342     }
343   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
344     {
345       /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
346          in general, IF_THEN_ELSE should not be combined.  */
347       return false;
348     }
349   else
350     {
351       /* This is the normal case.  */
352       rtx temp_extension
353 	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
354       rtx simplified_temp_extension = simplify_rtx (temp_extension);
355       if (simplified_temp_extension)
356         temp_extension = simplified_temp_extension;
357       new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
358     }
359 
360   /* This change is a part of a group of changes.  Hence,
361      validate_change will not try to commit the change.  */
362   if (validate_change (curr_insn, orig_set, new_set, true))
363     {
364       if (dump_file)
365         {
366           fprintf (dump_file,
367 		   "Tentatively merged extension with definition %s:\n",
368 		   (copy_needed) ? "(copy needed)" : "");
369           print_rtl_single (dump_file, curr_insn);
370         }
371       return true;
372     }
373 
374   return false;
375 }
376 
377 /* Treat if_then_else insns, where the operands of both branches
378    are registers, as copies.  For instance,
379    Original :
380    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
381    Transformed :
382    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
383    DEF_INSN is the if_then_else insn.  */
384 
385 static bool
transform_ifelse(ext_cand * cand,rtx def_insn)386 transform_ifelse (ext_cand *cand, rtx def_insn)
387 {
388   rtx set_insn = PATTERN (def_insn);
389   rtx srcreg, dstreg, srcreg2;
390   rtx map_srcreg, map_dstreg, map_srcreg2;
391   rtx ifexpr;
392   rtx cond;
393   rtx new_set;
394 
395   gcc_assert (GET_CODE (set_insn) == SET);
396 
397   cond = XEXP (SET_SRC (set_insn), 0);
398   dstreg = SET_DEST (set_insn);
399   srcreg = XEXP (SET_SRC (set_insn), 1);
400   srcreg2 = XEXP (SET_SRC (set_insn), 2);
401   /* If the conditional move already has the right or wider mode,
402      there is nothing to do.  */
403   if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
404     return true;
405 
406   map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
407   map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
408   map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
409   ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
410   new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
411 
412   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
413     {
414       if (dump_file)
415         {
416           fprintf (dump_file,
417 		   "Mode of conditional move instruction extended:\n");
418           print_rtl_single (dump_file, def_insn);
419         }
420       return true;
421     }
422 
423   return false;
424 }
425 
426 /* Get all the reaching definitions of an instruction.  The definitions are
427    desired for REG used in INSN.  Return the definition list or NULL if a
428    definition is missing.  If DEST is non-NULL, additionally push the INSN
429    of the definitions onto DEST.  */
430 
431 static struct df_link *
get_defs(rtx insn,rtx reg,vec<rtx> * dest)432 get_defs (rtx insn, rtx reg, vec<rtx> *dest)
433 {
434   df_ref reg_info, *uses;
435   struct df_link *ref_chain, *ref_link;
436 
437   reg_info = NULL;
438 
439   for (uses = DF_INSN_USES (insn); *uses; uses++)
440     {
441       reg_info = *uses;
442       if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
443         return NULL;
444       if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
445         break;
446     }
447 
448   gcc_assert (reg_info != NULL && uses != NULL);
449 
450   ref_chain = DF_REF_CHAIN (reg_info);
451 
452   for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
453     {
454       /* Problem getting some definition for this instruction.  */
455       if (ref_link->ref == NULL)
456         return NULL;
457       if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
458         return NULL;
459     }
460 
461   if (dest)
462     for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
463       dest->safe_push (DF_REF_INSN (ref_link->ref));
464 
465   return ref_chain;
466 }
467 
468 /* Return true if INSN is
469      (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
470    and store x1 and x2 in REG_1 and REG_2.  */
471 
472 static bool
is_cond_copy_insn(rtx insn,rtx * reg1,rtx * reg2)473 is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
474 {
475   rtx expr = single_set (insn);
476 
477   if (expr != NULL_RTX
478       && GET_CODE (expr) == SET
479       && GET_CODE (SET_DEST (expr)) == REG
480       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
481       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
482       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
483     {
484       *reg1 = XEXP (SET_SRC (expr), 1);
485       *reg2 = XEXP (SET_SRC (expr), 2);
486       return true;
487     }
488 
489   return false;
490 }
491 
492 enum ext_modified_kind
493 {
494   /* The insn hasn't been modified by ree pass yet.  */
495   EXT_MODIFIED_NONE,
496   /* Changed into zero extension.  */
497   EXT_MODIFIED_ZEXT,
498   /* Changed into sign extension.  */
499   EXT_MODIFIED_SEXT
500 };
501 
502 struct ATTRIBUTE_PACKED ext_modified
503 {
504   /* Mode from which ree has zero or sign extended the destination.  */
505   ENUM_BITFIELD(machine_mode) mode : 8;
506 
507   /* Kind of modification of the insn.  */
508   ENUM_BITFIELD(ext_modified_kind) kind : 2;
509 
510   unsigned int do_not_reextend : 1;
511 
512   /* True if the insn is scheduled to be deleted.  */
513   unsigned int deleted : 1;
514 };
515 
516 /* Vectors used by combine_reaching_defs and its helpers.  */
517 typedef struct ext_state
518 {
519   /* In order to avoid constant alloc/free, we keep these
520      4 vectors live through the entire find_and_remove_re and just
521      truncate them each time.  */
522   vec<rtx> defs_list;
523   vec<rtx> copies_list;
524   vec<rtx> modified_list;
525   vec<rtx> work_list;
526 
527   /* For instructions that have been successfully modified, this is
528      the original mode from which the insn is extending and
529      kind of extension.  */
530   struct ext_modified *modified;
531 } ext_state;
532 
533 /* Reaching Definitions of the extended register could be conditional copies
534    or regular definitions.  This function separates the two types into two
535    lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
536    if a reaching definition is a conditional copy, merging the extension with
537    this definition is wrong.  Conditional copies are merged by transitively
538    merging their definitions.  The defs_list is populated with all the reaching
539    definitions of the extension instruction (EXTEND_INSN) which must be merged
540    with an extension.  The copies_list contains all the conditional moves that
541    will later be extended into a wider mode conditional move if all the merges
542    are successful.  The function returns false upon failure, true upon
543    success.  */
544 
545 static bool
make_defs_and_copies_lists(rtx extend_insn,const_rtx set_pat,ext_state * state)546 make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
547 			    ext_state *state)
548 {
549   rtx src_reg = XEXP (SET_SRC (set_pat), 0);
550   bool *is_insn_visited;
551   bool ret = true;
552 
553   state->work_list.truncate (0);
554 
555   /* Initialize the work list.  */
556   if (!get_defs (extend_insn, src_reg, &state->work_list))
557     gcc_unreachable ();
558 
559   is_insn_visited = XCNEWVEC (bool, max_insn_uid);
560 
561   /* Perform transitive closure for conditional copies.  */
562   while (!state->work_list.is_empty ())
563     {
564       rtx def_insn = state->work_list.pop ();
565       rtx reg1, reg2;
566 
567       gcc_assert (INSN_UID (def_insn) < max_insn_uid);
568 
569       if (is_insn_visited[INSN_UID (def_insn)])
570 	continue;
571       is_insn_visited[INSN_UID (def_insn)] = true;
572 
573       if (is_cond_copy_insn (def_insn, &reg1, &reg2))
574 	{
575 	  /* Push it onto the copy list first.  */
576 	  state->copies_list.safe_push (def_insn);
577 
578 	  /* Now perform the transitive closure.  */
579 	  if (!get_defs (def_insn, reg1, &state->work_list)
580 	      || !get_defs (def_insn, reg2, &state->work_list))
581 	    {
582 	      ret = false;
583 	      break;
584 	    }
585         }
586       else
587 	state->defs_list.safe_push (def_insn);
588     }
589 
590   XDELETEVEC (is_insn_visited);
591 
592   return ret;
593 }
594 
595 /* If DEF_INSN has single SET expression, possibly buried inside
596    a PARALLEL, return the address of the SET expression, else
597    return NULL.  This is similar to single_set, except that
598    single_set allows multiple SETs when all but one is dead.  */
599 static rtx *
get_sub_rtx(rtx def_insn)600 get_sub_rtx (rtx def_insn)
601 {
602   enum rtx_code code = GET_CODE (PATTERN (def_insn));
603   rtx *sub_rtx = NULL;
604 
605   if (code == PARALLEL)
606     {
607       for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
608         {
609           rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
610           if (GET_CODE (s_expr) != SET)
611             continue;
612 
613           if (sub_rtx == NULL)
614             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
615           else
616             {
617               /* PARALLEL with multiple SETs.  */
618               return NULL;
619             }
620         }
621     }
622   else if (code == SET)
623     sub_rtx = &PATTERN (def_insn);
624   else
625     {
626       /* It is not a PARALLEL or a SET, what could it be ? */
627       return NULL;
628     }
629 
630   gcc_assert (sub_rtx != NULL);
631   return sub_rtx;
632 }
633 
634 /* Merge the DEF_INSN with an extension.  Calls combine_set_extension
635    on the SET pattern.  */
636 
637 static bool
merge_def_and_ext(ext_cand * cand,rtx def_insn,ext_state * state)638 merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
639 {
640   enum machine_mode ext_src_mode;
641   rtx *sub_rtx;
642 
643   ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
644   sub_rtx = get_sub_rtx (def_insn);
645 
646   if (sub_rtx == NULL)
647     return false;
648 
649   if (REG_P (SET_DEST (*sub_rtx))
650       && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
651 	  || ((state->modified[INSN_UID (def_insn)].kind
652 	       == (cand->code == ZERO_EXTEND
653 		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
654 	      && state->modified[INSN_UID (def_insn)].mode
655 		 == ext_src_mode)))
656     {
657       if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
658 	  >= GET_MODE_SIZE (cand->mode))
659 	return true;
660       /* If def_insn is already scheduled to be deleted, don't attempt
661 	 to modify it.  */
662       if (state->modified[INSN_UID (def_insn)].deleted)
663 	return false;
664       if (combine_set_extension (cand, def_insn, sub_rtx))
665 	{
666 	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
667 	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
668 	  return true;
669 	}
670     }
671 
672   return false;
673 }
674 
675 /* Given SRC, which should be one or more extensions of a REG, strip
676    away the extensions and return the REG.  */
677 
678 static inline rtx
get_extended_src_reg(rtx src)679 get_extended_src_reg (rtx src)
680 {
681   while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
682     src = XEXP (src, 0);
683   gcc_assert (REG_P (src));
684   return src;
685 }
686 
687 /* This function goes through all reaching defs of the source
688    of the candidate for elimination (CAND) and tries to combine
689    the extension with the definition instruction.  The changes
690    are made as a group so that even if one definition cannot be
691    merged, all reaching definitions end up not being merged.
692    When a conditional copy is encountered, merging is attempted
693    transitively on its definitions.  It returns true upon success
694    and false upon failure.  */
695 
696 static bool
combine_reaching_defs(ext_cand * cand,const_rtx set_pat,ext_state * state)697 combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
698 {
699   rtx def_insn;
700   bool merge_successful = true;
701   int i;
702   int defs_ix;
703   bool outcome;
704 
705   state->defs_list.truncate (0);
706   state->copies_list.truncate (0);
707 
708   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
709 
710   if (!outcome)
711     return false;
712 
713   /* If the destination operand of the extension is a different
714      register than the source operand, then additional restrictions
715      are needed.  Note we have to handle cases where we have nested
716      extensions in the source operand.  */
717   bool copy_needed
718     = (REGNO (SET_DEST (PATTERN (cand->insn)))
719        != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
720   if (copy_needed)
721     {
722       /* In theory we could handle more than one reaching def, it
723 	 just makes the code to update the insn stream more complex.  */
724       if (state->defs_list.length () != 1)
725 	return false;
726 
727       /* We require the candidate not already be modified.  It may,
728 	 for example have been changed from a (sign_extend (reg))
729 	 into (zero_extend (sign_extend (reg))).
730 
731 	 Handling that case shouldn't be terribly difficult, but the code
732 	 here and the code to emit copies would need auditing.  Until
733 	 we see a need, this is the safe thing to do.  */
734       if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
735 	return false;
736 
737       /* Transformation of
738 	 (set (reg1) (expression))
739 	 (set (reg2) (any_extend (reg1)))
740 	 into
741 	 (set (reg2) (any_extend (expression)))
742 	 (set (reg1) (reg2))
743 	 is only valid for scalar integral modes, as it relies on the low
744 	 subreg of reg1 to have the value of (expression), which is not true
745 	 e.g. for vector modes.  */
746       if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
747 	return false;
748 
749       /* There's only one reaching def.  */
750       rtx def_insn = state->defs_list[0];
751 
752       /* The defining statement must not have been modified either.  */
753       if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
754 	return false;
755 
756       /* The defining statement and candidate insn must be in the same block.
757 	 This is merely to keep the test for safety and updating the insn
758 	 stream simple.  Also ensure that within the block the candidate
759 	 follows the defining insn.  */
760       if (BLOCK_FOR_INSN (cand->insn) != BLOCK_FOR_INSN (def_insn)
761 	  || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn))
762 	return false;
763 
764       /* If there is an overlap between the destination of DEF_INSN and
765 	 CAND->insn, then this transformation is not safe.  Note we have
766 	 to test in the widened mode.  */
767       rtx *dest_sub_rtx = get_sub_rtx (def_insn);
768       if (dest_sub_rtx == NULL
769 	  || !REG_P (SET_DEST (*dest_sub_rtx)))
770 	return false;
771 
772       rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))),
773 				 REGNO (SET_DEST (*dest_sub_rtx)));
774       if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn))))
775 	return false;
776 
777       /* The destination register of the extension insn must not be
778 	 used or set between the def_insn and cand->insn exclusive.  */
779       if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)),
780 			      def_insn, cand->insn)
781 	  || reg_set_between_p (SET_DEST (PATTERN (cand->insn)),
782 				def_insn, cand->insn))
783 	return false;
784 
785       /* We must be able to copy between the two registers.   Generate,
786 	 recognize and verify constraints of the copy.  Also fail if this
787 	 generated more than one insn.
788 
789          This generates garbage since we throw away the insn when we're
790 	 done, only to recreate it later if this test was successful.
791 
792 	 Make sure to get the mode from the extension (cand->insn).  This
793 	 is different than in the code to emit the copy as we have not
794 	 modified the defining insn yet.  */
795       start_sequence ();
796       rtx pat = PATTERN (cand->insn);
797       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
798                                  REGNO (XEXP (SET_SRC (pat), 0)));
799       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)),
800                                  REGNO (SET_DEST (pat)));
801       emit_move_insn (new_dst, new_src);
802 
803       rtx insn = get_insns();
804       end_sequence ();
805       if (NEXT_INSN (insn))
806 	return false;
807       if (recog_memoized (insn) == -1)
808 	return false;
809       extract_insn (insn);
810       if (!constrain_operands (1))
811 	return false;
812     }
813 
814 
815   /* If cand->insn has been already modified, update cand->mode to a wider
816      mode if possible, or punt.  */
817   if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
818     {
819       enum machine_mode mode;
820       rtx set;
821 
822       if (state->modified[INSN_UID (cand->insn)].kind
823 	  != (cand->code == ZERO_EXTEND
824 	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
825 	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
826 	  || (set = single_set (cand->insn)) == NULL_RTX)
827 	return false;
828       mode = GET_MODE (SET_DEST (set));
829       gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
830       cand->mode = mode;
831     }
832 
833   merge_successful = true;
834 
835   /* Go through the defs vector and try to merge all the definitions
836      in this vector.  */
837   state->modified_list.truncate (0);
838   FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
839     {
840       if (merge_def_and_ext (cand, def_insn, state))
841 	state->modified_list.safe_push (def_insn);
842       else
843         {
844           merge_successful = false;
845           break;
846         }
847     }
848 
849   /* Now go through the conditional copies vector and try to merge all
850      the copies in this vector.  */
851   if (merge_successful)
852     {
853       FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
854         {
855           if (transform_ifelse (cand, def_insn))
856 	    state->modified_list.safe_push (def_insn);
857           else
858             {
859               merge_successful = false;
860               break;
861             }
862         }
863     }
864 
865   if (merge_successful)
866     {
867       /* Commit the changes here if possible
868 	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
869 	 cannot be merged, we entirely give up.  In the future, we should allow
870 	 extensions to be partially eliminated along those paths where the
871 	 definitions could be merged.  */
872       if (apply_change_group ())
873         {
874           if (dump_file)
875             fprintf (dump_file, "All merges were successful.\n");
876 
877 	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
878 	    {
879 	      ext_modified *modified = &state->modified[INSN_UID (def_insn)];
880 	      if (modified->kind == EXT_MODIFIED_NONE)
881 		modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
882 						            : EXT_MODIFIED_SEXT);
883 
884 	      if (copy_needed)
885 		modified->do_not_reextend = 1;
886 	    }
887           return true;
888         }
889       else
890         {
891           /* Changes need not be cancelled explicitly as apply_change_group
892              does it.  Print list of definitions in the dump_file for debug
893              purposes.  This extension cannot be deleted.  */
894           if (dump_file)
895             {
896 	      fprintf (dump_file,
897 		       "Merge cancelled, non-mergeable definitions:\n");
898 	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
899 	        print_rtl_single (dump_file, def_insn);
900             }
901         }
902     }
903   else
904     {
905       /* Cancel any changes that have been made so far.  */
906       cancel_changes (0);
907     }
908 
909   return false;
910 }
911 
912 /* Add an extension pattern that could be eliminated.  */
913 
914 static void
add_removable_extension(const_rtx expr,rtx insn,vec<ext_cand> * insn_list,unsigned * def_map)915 add_removable_extension (const_rtx expr, rtx insn,
916 			 vec<ext_cand> *insn_list,
917 			 unsigned *def_map)
918 {
919   enum rtx_code code;
920   enum machine_mode mode;
921   unsigned int idx;
922   rtx src, dest;
923 
924   /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
925   if (GET_CODE (expr) != SET)
926     return;
927 
928   src = SET_SRC (expr);
929   code = GET_CODE (src);
930   dest = SET_DEST (expr);
931   mode = GET_MODE (dest);
932 
933   if (REG_P (dest)
934       && (code == SIGN_EXTEND || code == ZERO_EXTEND)
935       && REG_P (XEXP (src, 0)))
936     {
937       struct df_link *defs, *def;
938       ext_cand *cand;
939 
940       /* First, make sure we can get all the reaching definitions.  */
941       defs = get_defs (insn, XEXP (src, 0), NULL);
942       if (!defs)
943 	{
944 	  if (dump_file)
945 	    {
946 	      fprintf (dump_file, "Cannot eliminate extension:\n");
947 	      print_rtl_single (dump_file, insn);
948 	      fprintf (dump_file, " because of missing definition(s)\n");
949 	    }
950 	  return;
951 	}
952 
953       /* Second, make sure the reaching definitions don't feed another and
954 	 different extension.  FIXME: this obviously can be improved.  */
955       for (def = defs; def; def = def->next)
956 	if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
957 	    && (cand = &(*insn_list)[idx - 1])
958 	    && cand->code != code)
959 	  {
960 	    if (dump_file)
961 	      {
962 	        fprintf (dump_file, "Cannot eliminate extension:\n");
963 		print_rtl_single (dump_file, insn);
964 	        fprintf (dump_file, " because of other extension\n");
965 	      }
966 	    return;
967 	  }
968 
969       /* Then add the candidate to the list and insert the reaching definitions
970          into the definition map.  */
971       ext_cand e = {expr, code, mode, insn};
972       insn_list->safe_push (e);
973       idx = insn_list->length ();
974 
975       for (def = defs; def; def = def->next)
976 	def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
977     }
978 }
979 
980 /* Traverse the instruction stream looking for extensions and return the
981    list of candidates.  */
982 
983 static vec<ext_cand>
find_removable_extensions(void)984 find_removable_extensions (void)
985 {
986   vec<ext_cand> insn_list = vNULL;
987   basic_block bb;
988   rtx insn, set;
989   unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
990 
991   FOR_EACH_BB_FN (bb, cfun)
992     FOR_BB_INSNS (bb, insn)
993       {
994 	if (!NONDEBUG_INSN_P (insn))
995 	  continue;
996 
997 	set = single_set (insn);
998 	if (set == NULL_RTX)
999 	  continue;
1000 	add_removable_extension (set, insn, &insn_list, def_map);
1001       }
1002 
1003   XDELETEVEC (def_map);
1004 
1005   return insn_list;
1006 }
1007 
1008 /* This is the main function that checks the insn stream for redundant
1009    extensions and tries to remove them if possible.  */
1010 
1011 static void
find_and_remove_re(void)1012 find_and_remove_re (void)
1013 {
1014   ext_cand *curr_cand;
1015   rtx curr_insn = NULL_RTX;
1016   int num_re_opportunities = 0, num_realized = 0, i;
1017   vec<ext_cand> reinsn_list;
1018   auto_vec<rtx> reinsn_del_list;
1019   auto_vec<rtx> reinsn_copy_list;
1020   ext_state state;
1021 
1022   /* Construct DU chain to get all reaching definitions of each
1023      extension instruction.  */
1024   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
1025   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
1026   df_analyze ();
1027   df_set_flags (DF_DEFER_INSN_RESCAN);
1028 
1029   max_insn_uid = get_max_uid ();
1030   reinsn_list = find_removable_extensions ();
1031   state.defs_list.create (0);
1032   state.copies_list.create (0);
1033   state.modified_list.create (0);
1034   state.work_list.create (0);
1035   if (reinsn_list.is_empty ())
1036     state.modified = NULL;
1037   else
1038     state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
1039 
1040   FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
1041     {
1042       num_re_opportunities++;
1043 
1044       /* Try to combine the extension with the definition.  */
1045       if (dump_file)
1046         {
1047           fprintf (dump_file, "Trying to eliminate extension:\n");
1048           print_rtl_single (dump_file, curr_cand->insn);
1049         }
1050 
1051       if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
1052         {
1053           if (dump_file)
1054             fprintf (dump_file, "Eliminated the extension.\n");
1055           num_realized++;
1056 	  /* If the RHS of the current candidate is not (extend (reg)), then
1057 	     we do not allow the optimization of extensions where
1058 	     the source and destination registers do not match.  Thus
1059 	     checking REG_P here is correct.  */
1060 	  if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))
1061 	      && (REGNO (SET_DEST (PATTERN (curr_cand->insn)))
1062 		  != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0))))
1063 	    {
1064               reinsn_copy_list.safe_push (curr_cand->insn);
1065               reinsn_copy_list.safe_push (state.defs_list[0]);
1066 	    }
1067 	  reinsn_del_list.safe_push (curr_cand->insn);
1068 	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
1069         }
1070     }
1071 
1072   /* The copy list contains pairs of insns which describe copies we
1073      need to insert into the INSN stream.
1074 
1075      The first insn in each pair is the extension insn, from which
1076      we derive the source and destination of the copy.
1077 
1078      The second insn in each pair is the memory reference where the
1079      extension will ultimately happen.  We emit the new copy
1080      immediately after this insn.
1081 
1082      It may first appear that the arguments for the copy are reversed.
1083      Remember that the memory reference will be changed to refer to the
1084      destination of the extention.  So we're actually emitting a copy
1085      from the new destination to the old destination.  */
1086   for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2)
1087     {
1088       rtx curr_insn = reinsn_copy_list[i];
1089       rtx def_insn = reinsn_copy_list[i + 1];
1090 
1091       /* Use the mode of the destination of the defining insn
1092 	 for the mode of the copy.  This is necessary if the
1093 	 defining insn was used to eliminate a second extension
1094 	 that was wider than the first.  */
1095       rtx sub_rtx = *get_sub_rtx (def_insn);
1096       rtx pat = PATTERN (curr_insn);
1097       rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1098 				 REGNO (XEXP (SET_SRC (pat), 0)));
1099       rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)),
1100 				 REGNO (SET_DEST (pat)));
1101       rtx set = gen_rtx_SET (VOIDmode, new_dst, new_src);
1102       emit_insn_after (set, def_insn);
1103     }
1104 
1105   /* Delete all useless extensions here in one sweep.  */
1106   FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
1107     delete_insn (curr_insn);
1108 
1109   reinsn_list.release ();
1110   state.defs_list.release ();
1111   state.copies_list.release ();
1112   state.modified_list.release ();
1113   state.work_list.release ();
1114   XDELETEVEC (state.modified);
1115 
1116   if (dump_file && num_re_opportunities > 0)
1117     fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
1118 	     num_re_opportunities, num_realized);
1119 }
1120 
1121 /* Find and remove redundant extensions.  */
1122 
1123 static unsigned int
rest_of_handle_ree(void)1124 rest_of_handle_ree (void)
1125 {
1126   timevar_push (TV_REE);
1127   find_and_remove_re ();
1128   timevar_pop (TV_REE);
1129   return 0;
1130 }
1131 
1132 /* Run REE pass when flag_ree is set at optimization level > 0.  */
1133 
1134 static bool
gate_handle_ree(void)1135 gate_handle_ree (void)
1136 {
1137   return (optimize > 0 && flag_ree);
1138 }
1139 
1140 namespace {
1141 
1142 const pass_data pass_data_ree =
1143 {
1144   RTL_PASS, /* type */
1145   "ree", /* name */
1146   OPTGROUP_NONE, /* optinfo_flags */
1147   true, /* has_gate */
1148   true, /* has_execute */
1149   TV_REE, /* tv_id */
1150   0, /* properties_required */
1151   0, /* properties_provided */
1152   0, /* properties_destroyed */
1153   0, /* todo_flags_start */
1154   ( TODO_df_finish | TODO_verify_rtl_sharing ), /* todo_flags_finish */
1155 };
1156 
1157 class pass_ree : public rtl_opt_pass
1158 {
1159 public:
pass_ree(gcc::context * ctxt)1160   pass_ree (gcc::context *ctxt)
1161     : rtl_opt_pass (pass_data_ree, ctxt)
1162   {}
1163 
1164   /* opt_pass methods: */
gate()1165   bool gate () { return gate_handle_ree (); }
execute()1166   unsigned int execute () { return rest_of_handle_ree (); }
1167 
1168 }; // class pass_ree
1169 
1170 } // anon namespace
1171 
1172 rtl_opt_pass *
make_pass_ree(gcc::context * ctxt)1173 make_pass_ree (gcc::context *ctxt)
1174 {
1175   return new pass_ree (ctxt);
1176 }
1177