1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2014 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42 #include "lower-subreg.h"
43 
44 #ifdef STACK_GROWS_DOWNWARD
45 # undef STACK_GROWS_DOWNWARD
46 # define STACK_GROWS_DOWNWARD 1
47 #else
48 # define STACK_GROWS_DOWNWARD 0
49 #endif
50 
51 
52 /* Decompose multi-word pseudo-registers into individual
53    pseudo-registers when possible and profitable.  This is possible
54    when all the uses of a multi-word register are via SUBREG, or are
55    copies of the register to another location.  Breaking apart the
56    register permits more CSE and permits better register allocation.
57    This is profitable if the machine does not have move instructions
58    to do this.
59 
60    This pass only splits moves with modes that are wider than
61    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
62    integer modes that are twice the width of word_mode.  The latter
63    could be generalized if there was a need to do this, but the trend in
64    architectures is to not need this.
65 
66    There are two useful preprocessor defines for use by maintainers:
67 
68    #define LOG_COSTS 1
69 
70    if you wish to see the actual cost estimates that are being used
71    for each mode wider than word mode and the cost estimates for zero
72    extension and the shifts.   This can be useful when port maintainers
73    are tuning insn rtx costs.
74 
75    #define FORCE_LOWERING 1
76 
77    if you wish to test the pass with all the transformation forced on.
78    This can be useful for finding bugs in the transformations.  */
79 
80 #define LOG_COSTS 0
81 #define FORCE_LOWERING 0
82 
83 /* Bit N in this bitmap is set if regno N is used in a context in
84    which we can decompose it.  */
85 static bitmap decomposable_context;
86 
87 /* Bit N in this bitmap is set if regno N is used in a context in
88    which it can not be decomposed.  */
89 static bitmap non_decomposable_context;
90 
91 /* Bit N in this bitmap is set if regno N is used in a subreg
92    which changes the mode but not the size.  This typically happens
93    when the register accessed as a floating-point value; we want to
94    avoid generating accesses to its subwords in integer modes.  */
95 static bitmap subreg_context;
96 
97 /* Bit N in the bitmap in element M of this array is set if there is a
98    copy from reg M to reg N.  */
99 static vec<bitmap> reg_copy_graph;
100 
101 struct target_lower_subreg default_target_lower_subreg;
102 #if SWITCHABLE_TARGET
103 struct target_lower_subreg *this_target_lower_subreg
104   = &default_target_lower_subreg;
105 #endif
106 
107 #define twice_word_mode \
108   this_target_lower_subreg->x_twice_word_mode
109 #define choices \
110   this_target_lower_subreg->x_choices
111 
112 /* RTXes used while computing costs.  */
113 struct cost_rtxes {
114   /* Source and target registers.  */
115   rtx source;
116   rtx target;
117 
118   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
119   rtx zext;
120 
121   /* A shift of SOURCE.  */
122   rtx shift;
123 
124   /* A SET of TARGET.  */
125   rtx set;
126 };
127 
128 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
129    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
130 
131 static int
shift_cost(bool speed_p,struct cost_rtxes * rtxes,enum rtx_code code,enum machine_mode mode,int op1)132 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
133 	    enum machine_mode mode, int op1)
134 {
135   PUT_CODE (rtxes->shift, code);
136   PUT_MODE (rtxes->shift, mode);
137   PUT_MODE (rtxes->source, mode);
138   XEXP (rtxes->shift, 1) = GEN_INT (op1);
139   return set_src_cost (rtxes->shift, speed_p);
140 }
141 
142 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
143    to true if it is profitable to split a double-word CODE shift
144    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
145    for speed or size profitability.
146 
147    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
148    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
149    is the cost of moving between word registers.  */
150 
151 static void
compute_splitting_shift(bool speed_p,struct cost_rtxes * rtxes,bool * splitting,enum rtx_code code,int word_move_zero_cost,int word_move_cost)152 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
153 			 bool *splitting, enum rtx_code code,
154 			 int word_move_zero_cost, int word_move_cost)
155 {
156   int wide_cost, narrow_cost, upper_cost, i;
157 
158   for (i = 0; i < BITS_PER_WORD; i++)
159     {
160       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
161 			      i + BITS_PER_WORD);
162       if (i == 0)
163 	narrow_cost = word_move_cost;
164       else
165 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
166 
167       if (code != ASHIFTRT)
168 	upper_cost = word_move_zero_cost;
169       else if (i == BITS_PER_WORD - 1)
170 	upper_cost = word_move_cost;
171       else
172 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
173 				 BITS_PER_WORD - 1);
174 
175       if (LOG_COSTS)
176 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
177 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
178 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
179 
180       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
181 	splitting[i] = true;
182     }
183 }
184 
185 /* Compute what we should do when optimizing for speed or size; SPEED_P
186    selects which.  Use RTXES for computing costs.  */
187 
188 static void
compute_costs(bool speed_p,struct cost_rtxes * rtxes)189 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
190 {
191   unsigned int i;
192   int word_move_zero_cost, word_move_cost;
193 
194   PUT_MODE (rtxes->target, word_mode);
195   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
196   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
197 
198   SET_SRC (rtxes->set) = rtxes->source;
199   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
200 
201   if (LOG_COSTS)
202     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
203 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
204 
205   for (i = 0; i < MAX_MACHINE_MODE; i++)
206     {
207       enum machine_mode mode = (enum machine_mode) i;
208       int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
209       if (factor > 1)
210 	{
211 	  int mode_move_cost;
212 
213 	  PUT_MODE (rtxes->target, mode);
214 	  PUT_MODE (rtxes->source, mode);
215 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
216 
217 	  if (LOG_COSTS)
218 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
219 		     GET_MODE_NAME (mode), mode_move_cost,
220 		     word_move_cost, factor);
221 
222 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
223 	    {
224 	      choices[speed_p].move_modes_to_split[i] = true;
225 	      choices[speed_p].something_to_do = true;
226 	    }
227 	}
228     }
229 
230   /* For the moves and shifts, the only case that is checked is one
231      where the mode of the target is an integer mode twice the width
232      of the word_mode.
233 
234      If it is not profitable to split a double word move then do not
235      even consider the shifts or the zero extension.  */
236   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
237     {
238       int zext_cost;
239 
240       /* The only case here to check to see if moving the upper part with a
241 	 zero is cheaper than doing the zext itself.  */
242       PUT_MODE (rtxes->source, word_mode);
243       zext_cost = set_src_cost (rtxes->zext, speed_p);
244 
245       if (LOG_COSTS)
246 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
247 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
248 		 zext_cost, word_move_cost, word_move_zero_cost);
249 
250       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
251 	choices[speed_p].splitting_zext = true;
252 
253       compute_splitting_shift (speed_p, rtxes,
254 			       choices[speed_p].splitting_ashift, ASHIFT,
255 			       word_move_zero_cost, word_move_cost);
256       compute_splitting_shift (speed_p, rtxes,
257 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
258 			       word_move_zero_cost, word_move_cost);
259       compute_splitting_shift (speed_p, rtxes,
260 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
261 			       word_move_zero_cost, word_move_cost);
262     }
263 }
264 
265 /* Do one-per-target initialisation.  This involves determining
266    which operations on the machine are profitable.  If none are found,
267    then the pass just returns when called.  */
268 
269 void
init_lower_subreg(void)270 init_lower_subreg (void)
271 {
272   struct cost_rtxes rtxes;
273 
274   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
275 
276   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
277 
278   rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
279   rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
280   rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
281   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
282   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
283 
284   if (LOG_COSTS)
285     fprintf (stderr, "\nSize costs\n==========\n\n");
286   compute_costs (false, &rtxes);
287 
288   if (LOG_COSTS)
289     fprintf (stderr, "\nSpeed costs\n===========\n\n");
290   compute_costs (true, &rtxes);
291 }
292 
293 static bool
simple_move_operand(rtx x)294 simple_move_operand (rtx x)
295 {
296   if (GET_CODE (x) == SUBREG)
297     x = SUBREG_REG (x);
298 
299   if (!OBJECT_P (x))
300     return false;
301 
302   if (GET_CODE (x) == LABEL_REF
303       || GET_CODE (x) == SYMBOL_REF
304       || GET_CODE (x) == HIGH
305       || GET_CODE (x) == CONST)
306     return false;
307 
308   if (MEM_P (x)
309       && (MEM_VOLATILE_P (x)
310 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
311     return false;
312 
313   return true;
314 }
315 
316 /* If INSN is a single set between two objects that we want to split,
317    return the single set.  SPEED_P says whether we are optimizing
318    INSN for speed or size.
319 
320    INSN should have been passed to recog and extract_insn before this
321    is called.  */
322 
323 static rtx
simple_move(rtx insn,bool speed_p)324 simple_move (rtx insn, bool speed_p)
325 {
326   rtx x;
327   rtx set;
328   enum machine_mode mode;
329 
330   if (recog_data.n_operands != 2)
331     return NULL_RTX;
332 
333   set = single_set (insn);
334   if (!set)
335     return NULL_RTX;
336 
337   x = SET_DEST (set);
338   if (x != recog_data.operand[0] && x != recog_data.operand[1])
339     return NULL_RTX;
340   if (!simple_move_operand (x))
341     return NULL_RTX;
342 
343   x = SET_SRC (set);
344   if (x != recog_data.operand[0] && x != recog_data.operand[1])
345     return NULL_RTX;
346   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
347      things like x86 rdtsc which returns a DImode value.  */
348   if (GET_CODE (x) != ASM_OPERANDS
349       && !simple_move_operand (x))
350     return NULL_RTX;
351 
352   /* We try to decompose in integer modes, to avoid generating
353      inefficient code copying between integer and floating point
354      registers.  That means that we can't decompose if this is a
355      non-integer mode for which there is no integer mode of the same
356      size.  */
357   mode = GET_MODE (SET_DEST (set));
358   if (!SCALAR_INT_MODE_P (mode)
359       && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
360 	  == BLKmode))
361     return NULL_RTX;
362 
363   /* Reject PARTIAL_INT modes.  They are used for processor specific
364      purposes and it's probably best not to tamper with them.  */
365   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
366     return NULL_RTX;
367 
368   if (!choices[speed_p].move_modes_to_split[(int) mode])
369     return NULL_RTX;
370 
371   return set;
372 }
373 
374 /* If SET is a copy from one multi-word pseudo-register to another,
375    record that in reg_copy_graph.  Return whether it is such a
376    copy.  */
377 
378 static bool
find_pseudo_copy(rtx set)379 find_pseudo_copy (rtx set)
380 {
381   rtx dest = SET_DEST (set);
382   rtx src = SET_SRC (set);
383   unsigned int rd, rs;
384   bitmap b;
385 
386   if (!REG_P (dest) || !REG_P (src))
387     return false;
388 
389   rd = REGNO (dest);
390   rs = REGNO (src);
391   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
392     return false;
393 
394   b = reg_copy_graph[rs];
395   if (b == NULL)
396     {
397       b = BITMAP_ALLOC (NULL);
398       reg_copy_graph[rs] = b;
399     }
400 
401   bitmap_set_bit (b, rd);
402 
403   return true;
404 }
405 
406 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
407    where they are copied to another register, add the register to
408    which they are copied to DECOMPOSABLE_CONTEXT.  Use
409    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
410    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
411 
412 static void
propagate_pseudo_copies(void)413 propagate_pseudo_copies (void)
414 {
415   bitmap queue, propagate;
416 
417   queue = BITMAP_ALLOC (NULL);
418   propagate = BITMAP_ALLOC (NULL);
419 
420   bitmap_copy (queue, decomposable_context);
421   do
422     {
423       bitmap_iterator iter;
424       unsigned int i;
425 
426       bitmap_clear (propagate);
427 
428       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
429 	{
430 	  bitmap b = reg_copy_graph[i];
431 	  if (b)
432 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
433 	}
434 
435       bitmap_and_compl (queue, propagate, decomposable_context);
436       bitmap_ior_into (decomposable_context, propagate);
437     }
438   while (!bitmap_empty_p (queue));
439 
440   BITMAP_FREE (queue);
441   BITMAP_FREE (propagate);
442 }
443 
444 /* A pointer to one of these values is passed to
445    find_decomposable_subregs via for_each_rtx.  */
446 
447 enum classify_move_insn
448 {
449   /* Not a simple move from one location to another.  */
450   NOT_SIMPLE_MOVE,
451   /* A simple move we want to decompose.  */
452   DECOMPOSABLE_SIMPLE_MOVE,
453   /* Any other simple move.  */
454   SIMPLE_MOVE
455 };
456 
457 /* This is called via for_each_rtx.  If we find a SUBREG which we
458    could use to decompose a pseudo-register, set a bit in
459    DECOMPOSABLE_CONTEXT.  If we find an unadorned register which is
460    not a simple pseudo-register copy, DATA will point at the type of
461    move, and we set a bit in DECOMPOSABLE_CONTEXT or
462    NON_DECOMPOSABLE_CONTEXT as appropriate.  */
463 
464 static int
find_decomposable_subregs(rtx * px,void * data)465 find_decomposable_subregs (rtx *px, void *data)
466 {
467   enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
468   rtx x = *px;
469 
470   if (x == NULL_RTX)
471     return 0;
472 
473   if (GET_CODE (x) == SUBREG)
474     {
475       rtx inner = SUBREG_REG (x);
476       unsigned int regno, outer_size, inner_size, outer_words, inner_words;
477 
478       if (!REG_P (inner))
479 	return 0;
480 
481       regno = REGNO (inner);
482       if (HARD_REGISTER_NUM_P (regno))
483 	return -1;
484 
485       outer_size = GET_MODE_SIZE (GET_MODE (x));
486       inner_size = GET_MODE_SIZE (GET_MODE (inner));
487       outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
488       inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
489 
490       /* We only try to decompose single word subregs of multi-word
491 	 registers.  When we find one, we return -1 to avoid iterating
492 	 over the inner register.
493 
494 	 ??? This doesn't allow, e.g., DImode subregs of TImode values
495 	 on 32-bit targets.  We would need to record the way the
496 	 pseudo-register was used, and only decompose if all the uses
497 	 were the same number and size of pieces.  Hopefully this
498 	 doesn't happen much.  */
499 
500       if (outer_words == 1 && inner_words > 1)
501 	{
502 	  bitmap_set_bit (decomposable_context, regno);
503 	  return -1;
504 	}
505 
506       /* If this is a cast from one mode to another, where the modes
507 	 have the same size, and they are not tieable, then mark this
508 	 register as non-decomposable.  If we decompose it we are
509 	 likely to mess up whatever the backend is trying to do.  */
510       if (outer_words > 1
511 	  && outer_size == inner_size
512 	  && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
513 	{
514 	  bitmap_set_bit (non_decomposable_context, regno);
515 	  bitmap_set_bit (subreg_context, regno);
516 	  return -1;
517 	}
518     }
519   else if (REG_P (x))
520     {
521       unsigned int regno;
522 
523       /* We will see an outer SUBREG before we see the inner REG, so
524 	 when we see a plain REG here it means a direct reference to
525 	 the register.
526 
527 	 If this is not a simple copy from one location to another,
528 	 then we can not decompose this register.  If this is a simple
529 	 copy we want to decompose, and the mode is right,
530 	 then we mark the register as decomposable.
531 	 Otherwise we don't say anything about this register --
532 	 it could be decomposed, but whether that would be
533 	 profitable depends upon how it is used elsewhere.
534 
535 	 We only set bits in the bitmap for multi-word
536 	 pseudo-registers, since those are the only ones we care about
537 	 and it keeps the size of the bitmaps down.  */
538 
539       regno = REGNO (x);
540       if (!HARD_REGISTER_NUM_P (regno)
541 	  && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
542 	{
543 	  switch (*pcmi)
544 	    {
545 	    case NOT_SIMPLE_MOVE:
546 	      bitmap_set_bit (non_decomposable_context, regno);
547 	      break;
548 	    case DECOMPOSABLE_SIMPLE_MOVE:
549 	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
550 		bitmap_set_bit (decomposable_context, regno);
551 	      break;
552 	    case SIMPLE_MOVE:
553 	      break;
554 	    default:
555 	      gcc_unreachable ();
556 	    }
557 	}
558     }
559   else if (MEM_P (x))
560     {
561       enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
562 
563       /* Any registers used in a MEM do not participate in a
564 	 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
565 	 here, and return -1 to block the parent's recursion.  */
566       for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
567       return -1;
568     }
569 
570   return 0;
571 }
572 
573 /* Decompose REGNO into word-sized components.  We smash the REG node
574    in place.  This ensures that (1) something goes wrong quickly if we
575    fail to make some replacement, and (2) the debug information inside
576    the symbol table is automatically kept up to date.  */
577 
578 static void
decompose_register(unsigned int regno)579 decompose_register (unsigned int regno)
580 {
581   rtx reg;
582   unsigned int words, i;
583   rtvec v;
584 
585   reg = regno_reg_rtx[regno];
586 
587   regno_reg_rtx[regno] = NULL_RTX;
588 
589   words = GET_MODE_SIZE (GET_MODE (reg));
590   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
591 
592   v = rtvec_alloc (words);
593   for (i = 0; i < words; ++i)
594     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
595 
596   PUT_CODE (reg, CONCATN);
597   XVEC (reg, 0) = v;
598 
599   if (dump_file)
600     {
601       fprintf (dump_file, "; Splitting reg %u ->", regno);
602       for (i = 0; i < words; ++i)
603 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
604       fputc ('\n', dump_file);
605     }
606 }
607 
608 /* Get a SUBREG of a CONCATN.  */
609 
610 static rtx
simplify_subreg_concatn(enum machine_mode outermode,rtx op,unsigned int byte)611 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
612 			 unsigned int byte)
613 {
614   unsigned int inner_size;
615   enum machine_mode innermode, partmode;
616   rtx part;
617   unsigned int final_offset;
618 
619   gcc_assert (GET_CODE (op) == CONCATN);
620   gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
621 
622   innermode = GET_MODE (op);
623   gcc_assert (byte < GET_MODE_SIZE (innermode));
624   gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
625 
626   inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
627   part = XVECEXP (op, 0, byte / inner_size);
628   partmode = GET_MODE (part);
629 
630   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
631      regular CONST_VECTORs.  They have vector or integer modes, depending
632      on the capabilities of the target.  Cope with them.  */
633   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
634     partmode = GET_MODE_INNER (innermode);
635   else if (partmode == VOIDmode)
636     {
637       enum mode_class mclass = GET_MODE_CLASS (innermode);
638       partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
639     }
640 
641   final_offset = byte % inner_size;
642   if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
643     return NULL_RTX;
644 
645   return simplify_gen_subreg (outermode, part, partmode, final_offset);
646 }
647 
648 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
649 
650 static rtx
simplify_gen_subreg_concatn(enum machine_mode outermode,rtx op,enum machine_mode innermode,unsigned int byte)651 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
652 			     enum machine_mode innermode, unsigned int byte)
653 {
654   rtx ret;
655 
656   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
657      If OP is a SUBREG of a CONCATN, then it must be a simple mode
658      change with the same size and offset 0, or it must extract a
659      part.  We shouldn't see anything else here.  */
660   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
661     {
662       rtx op2;
663 
664       if ((GET_MODE_SIZE (GET_MODE (op))
665 	   == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
666 	  && SUBREG_BYTE (op) == 0)
667 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
668 					    GET_MODE (SUBREG_REG (op)), byte);
669 
670       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
671 				     SUBREG_BYTE (op));
672       if (op2 == NULL_RTX)
673 	{
674 	  /* We don't handle paradoxical subregs here.  */
675 	  gcc_assert (GET_MODE_SIZE (outermode)
676 		      <= GET_MODE_SIZE (GET_MODE (op)));
677 	  gcc_assert (GET_MODE_SIZE (GET_MODE (op))
678 		      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
679 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
680 					 byte + SUBREG_BYTE (op));
681 	  gcc_assert (op2 != NULL_RTX);
682 	  return op2;
683 	}
684 
685       op = op2;
686       gcc_assert (op != NULL_RTX);
687       gcc_assert (innermode == GET_MODE (op));
688     }
689 
690   if (GET_CODE (op) == CONCATN)
691     return simplify_subreg_concatn (outermode, op, byte);
692 
693   ret = simplify_gen_subreg (outermode, op, innermode, byte);
694 
695   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
696      resolve_simple_move will ask for the high part of the paradoxical
697      subreg, which does not have a value.  Just return a zero.  */
698   if (ret == NULL_RTX
699       && GET_CODE (op) == SUBREG
700       && SUBREG_BYTE (op) == 0
701       && (GET_MODE_SIZE (innermode)
702 	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
703     return CONST0_RTX (outermode);
704 
705   gcc_assert (ret != NULL_RTX);
706   return ret;
707 }
708 
709 /* Return whether we should resolve X into the registers into which it
710    was decomposed.  */
711 
712 static bool
resolve_reg_p(rtx x)713 resolve_reg_p (rtx x)
714 {
715   return GET_CODE (x) == CONCATN;
716 }
717 
718 /* Return whether X is a SUBREG of a register which we need to
719    resolve.  */
720 
721 static bool
resolve_subreg_p(rtx x)722 resolve_subreg_p (rtx x)
723 {
724   if (GET_CODE (x) != SUBREG)
725     return false;
726   return resolve_reg_p (SUBREG_REG (x));
727 }
728 
729 /* This is called via for_each_rtx.  Look for SUBREGs which need to be
730    decomposed.  */
731 
732 static int
resolve_subreg_use(rtx * px,void * data)733 resolve_subreg_use (rtx *px, void *data)
734 {
735   rtx insn = (rtx) data;
736   rtx x = *px;
737 
738   if (x == NULL_RTX)
739     return 0;
740 
741   if (resolve_subreg_p (x))
742     {
743       x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 				   SUBREG_BYTE (x));
745 
746       /* It is possible for a note to contain a reference which we can
747 	 decompose.  In this case, return 1 to the caller to indicate
748 	 that the note must be removed.  */
749       if (!x)
750 	{
751 	  gcc_assert (!insn);
752 	  return 1;
753 	}
754 
755       validate_change (insn, px, x, 1);
756       return -1;
757     }
758 
759   if (resolve_reg_p (x))
760     {
761       /* Return 1 to the caller to indicate that we found a direct
762 	 reference to a register which is being decomposed.  This can
763 	 happen inside notes, multiword shift or zero-extend
764 	 instructions.  */
765       return 1;
766     }
767 
768   return 0;
769 }
770 
771 /* This is called via for_each_rtx.  Look for SUBREGs which can be
772    decomposed and decomposed REGs that need copying.  */
773 
774 static int
adjust_decomposed_uses(rtx * px,void * data ATTRIBUTE_UNUSED)775 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
776 {
777   rtx x = *px;
778 
779   if (x == NULL_RTX)
780     return 0;
781 
782   if (resolve_subreg_p (x))
783     {
784       x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
785 				   SUBREG_BYTE (x));
786 
787       if (x)
788 	*px = x;
789       else
790 	x = copy_rtx (*px);
791     }
792 
793   if (resolve_reg_p (x))
794     *px = copy_rtx (x);
795 
796   return 0;
797 }
798 
799 /* Resolve any decomposed registers which appear in register notes on
800    INSN.  */
801 
802 static void
resolve_reg_notes(rtx insn)803 resolve_reg_notes (rtx insn)
804 {
805   rtx *pnote, note;
806 
807   note = find_reg_equal_equiv_note (insn);
808   if (note)
809     {
810       int old_count = num_validated_changes ();
811       if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
812 	remove_note (insn, note);
813       else
814 	if (old_count != num_validated_changes ())
815 	  df_notes_rescan (insn);
816     }
817 
818   pnote = &REG_NOTES (insn);
819   while (*pnote != NULL_RTX)
820     {
821       bool del = false;
822 
823       note = *pnote;
824       switch (REG_NOTE_KIND (note))
825 	{
826 	case REG_DEAD:
827 	case REG_UNUSED:
828 	  if (resolve_reg_p (XEXP (note, 0)))
829 	    del = true;
830 	  break;
831 
832 	default:
833 	  break;
834 	}
835 
836       if (del)
837 	*pnote = XEXP (note, 1);
838       else
839 	pnote = &XEXP (note, 1);
840     }
841 }
842 
843 /* Return whether X can be decomposed into subwords.  */
844 
845 static bool
can_decompose_p(rtx x)846 can_decompose_p (rtx x)
847 {
848   if (REG_P (x))
849     {
850       unsigned int regno = REGNO (x);
851 
852       if (HARD_REGISTER_NUM_P (regno))
853 	{
854 	  unsigned int byte, num_bytes;
855 
856 	  num_bytes = GET_MODE_SIZE (GET_MODE (x));
857 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
858 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
859 	      return false;
860 	  return true;
861 	}
862       else
863 	return !bitmap_bit_p (subreg_context, regno);
864     }
865 
866   return true;
867 }
868 
869 /* Decompose the registers used in a simple move SET within INSN.  If
870    we don't change anything, return INSN, otherwise return the start
871    of the sequence of moves.  */
872 
873 static rtx
resolve_simple_move(rtx set,rtx insn)874 resolve_simple_move (rtx set, rtx insn)
875 {
876   rtx src, dest, real_dest, insns;
877   enum machine_mode orig_mode, dest_mode;
878   unsigned int words;
879   bool pushing;
880 
881   src = SET_SRC (set);
882   dest = SET_DEST (set);
883   orig_mode = GET_MODE (dest);
884 
885   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
886   gcc_assert (words > 1);
887 
888   start_sequence ();
889 
890   /* We have to handle copying from a SUBREG of a decomposed reg where
891      the SUBREG is larger than word size.  Rather than assume that we
892      can take a word_mode SUBREG of the destination, we copy to a new
893      register and then copy that to the destination.  */
894 
895   real_dest = NULL_RTX;
896 
897   if (GET_CODE (src) == SUBREG
898       && resolve_reg_p (SUBREG_REG (src))
899       && (SUBREG_BYTE (src) != 0
900 	  || (GET_MODE_SIZE (orig_mode)
901 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
902     {
903       real_dest = dest;
904       dest = gen_reg_rtx (orig_mode);
905       if (REG_P (real_dest))
906 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
907     }
908 
909   /* Similarly if we are copying to a SUBREG of a decomposed reg where
910      the SUBREG is larger than word size.  */
911 
912   if (GET_CODE (dest) == SUBREG
913       && resolve_reg_p (SUBREG_REG (dest))
914       && (SUBREG_BYTE (dest) != 0
915 	  || (GET_MODE_SIZE (orig_mode)
916 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
917     {
918       rtx reg, minsn, smove;
919 
920       reg = gen_reg_rtx (orig_mode);
921       minsn = emit_move_insn (reg, src);
922       smove = single_set (minsn);
923       gcc_assert (smove != NULL_RTX);
924       resolve_simple_move (smove, minsn);
925       src = reg;
926     }
927 
928   /* If we didn't have any big SUBREGS of decomposed registers, and
929      neither side of the move is a register we are decomposing, then
930      we don't have to do anything here.  */
931 
932   if (src == SET_SRC (set)
933       && dest == SET_DEST (set)
934       && !resolve_reg_p (src)
935       && !resolve_subreg_p (src)
936       && !resolve_reg_p (dest)
937       && !resolve_subreg_p (dest))
938     {
939       end_sequence ();
940       return insn;
941     }
942 
943   /* It's possible for the code to use a subreg of a decomposed
944      register while forming an address.  We need to handle that before
945      passing the address to emit_move_insn.  We pass NULL_RTX as the
946      insn parameter to resolve_subreg_use because we can not validate
947      the insn yet.  */
948   if (MEM_P (src) || MEM_P (dest))
949     {
950       int acg;
951 
952       if (MEM_P (src))
953 	for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
954       if (MEM_P (dest))
955 	for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
956       acg = apply_change_group ();
957       gcc_assert (acg);
958     }
959 
960   /* If SRC is a register which we can't decompose, or has side
961      effects, we need to move via a temporary register.  */
962 
963   if (!can_decompose_p (src)
964       || side_effects_p (src)
965       || GET_CODE (src) == ASM_OPERANDS)
966     {
967       rtx reg;
968 
969       reg = gen_reg_rtx (orig_mode);
970 
971 #ifdef AUTO_INC_DEC
972       {
973 	rtx move = emit_move_insn (reg, src);
974 	if (MEM_P (src))
975 	  {
976 	    rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
977 	    if (note)
978 	      add_reg_note (move, REG_INC, XEXP (note, 0));
979 	  }
980       }
981 #else
982       emit_move_insn (reg, src);
983 #endif
984       src = reg;
985     }
986 
987   /* If DEST is a register which we can't decompose, or has side
988      effects, we need to first move to a temporary register.  We
989      handle the common case of pushing an operand directly.  We also
990      go through a temporary register if it holds a floating point
991      value.  This gives us better code on systems which can't move
992      data easily between integer and floating point registers.  */
993 
994   dest_mode = orig_mode;
995   pushing = push_operand (dest, dest_mode);
996   if (!can_decompose_p (dest)
997       || (side_effects_p (dest) && !pushing)
998       || (!SCALAR_INT_MODE_P (dest_mode)
999 	  && !resolve_reg_p (dest)
1000 	  && !resolve_subreg_p (dest)))
1001     {
1002       if (real_dest == NULL_RTX)
1003 	real_dest = dest;
1004       if (!SCALAR_INT_MODE_P (dest_mode))
1005 	{
1006 	  dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1007 				     MODE_INT, 0);
1008 	  gcc_assert (dest_mode != BLKmode);
1009 	}
1010       dest = gen_reg_rtx (dest_mode);
1011       if (REG_P (real_dest))
1012 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
1013     }
1014 
1015   if (pushing)
1016     {
1017       unsigned int i, j, jinc;
1018 
1019       gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1020       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1021       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1022 
1023       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1024 	{
1025 	  j = 0;
1026 	  jinc = 1;
1027 	}
1028       else
1029 	{
1030 	  j = words - 1;
1031 	  jinc = -1;
1032 	}
1033 
1034       for (i = 0; i < words; ++i, j += jinc)
1035 	{
1036 	  rtx temp;
1037 
1038 	  temp = copy_rtx (XEXP (dest, 0));
1039 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1040 					       j * UNITS_PER_WORD);
1041 	  emit_move_insn (temp,
1042 			  simplify_gen_subreg_concatn (word_mode, src,
1043 						       orig_mode,
1044 						       j * UNITS_PER_WORD));
1045 	}
1046     }
1047   else
1048     {
1049       unsigned int i;
1050 
1051       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1052 	emit_clobber (dest);
1053 
1054       for (i = 0; i < words; ++i)
1055 	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1056 						     dest_mode,
1057 						     i * UNITS_PER_WORD),
1058 			simplify_gen_subreg_concatn (word_mode, src,
1059 						     orig_mode,
1060 						     i * UNITS_PER_WORD));
1061     }
1062 
1063   if (real_dest != NULL_RTX)
1064     {
1065       rtx mdest, minsn, smove;
1066 
1067       if (dest_mode == orig_mode)
1068 	mdest = dest;
1069       else
1070 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1071       minsn = emit_move_insn (real_dest, mdest);
1072 
1073 #ifdef AUTO_INC_DEC
1074   if (MEM_P (real_dest)
1075       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1076     {
1077       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1078       if (note)
1079 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1080     }
1081 #endif
1082 
1083       smove = single_set (minsn);
1084       gcc_assert (smove != NULL_RTX);
1085 
1086       resolve_simple_move (smove, minsn);
1087     }
1088 
1089   insns = get_insns ();
1090   end_sequence ();
1091 
1092   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1093 
1094   emit_insn_before (insns, insn);
1095 
1096   /* If we get here via self-recursion, then INSN is not yet in the insns
1097      chain and delete_insn will fail.  We only want to remove INSN from the
1098      current sequence.  See PR56738.  */
1099   if (in_sequence_p ())
1100     remove_insn (insn);
1101   else
1102     delete_insn (insn);
1103 
1104   return insns;
1105 }
1106 
1107 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1108    component registers.  Return whether we changed something.  */
1109 
1110 static bool
resolve_clobber(rtx pat,rtx insn)1111 resolve_clobber (rtx pat, rtx insn)
1112 {
1113   rtx reg;
1114   enum machine_mode orig_mode;
1115   unsigned int words, i;
1116   int ret;
1117 
1118   reg = XEXP (pat, 0);
1119   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1120     return false;
1121 
1122   orig_mode = GET_MODE (reg);
1123   words = GET_MODE_SIZE (orig_mode);
1124   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1125 
1126   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1127 			 simplify_gen_subreg_concatn (word_mode, reg,
1128 						      orig_mode, 0),
1129 			 0);
1130   df_insn_rescan (insn);
1131   gcc_assert (ret != 0);
1132 
1133   for (i = words - 1; i > 0; --i)
1134     {
1135       rtx x;
1136 
1137       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1138 				       i * UNITS_PER_WORD);
1139       x = gen_rtx_CLOBBER (VOIDmode, x);
1140       emit_insn_after (x, insn);
1141     }
1142 
1143   resolve_reg_notes (insn);
1144 
1145   return true;
1146 }
1147 
1148 /* A USE of a decomposed register is no longer meaningful.  Return
1149    whether we changed something.  */
1150 
1151 static bool
resolve_use(rtx pat,rtx insn)1152 resolve_use (rtx pat, rtx insn)
1153 {
1154   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1155     {
1156       delete_insn (insn);
1157       return true;
1158     }
1159 
1160   resolve_reg_notes (insn);
1161 
1162   return false;
1163 }
1164 
1165 /* A VAR_LOCATION can be simplified.  */
1166 
1167 static void
resolve_debug(rtx insn)1168 resolve_debug (rtx insn)
1169 {
1170   for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1171 
1172   df_insn_rescan (insn);
1173 
1174   resolve_reg_notes (insn);
1175 }
1176 
1177 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1178    set the decomposable_context bitmap accordingly.  SPEED_P is true
1179    if we are optimizing INSN for speed rather than size.  Return true
1180    if INSN is decomposable.  */
1181 
1182 static bool
find_decomposable_shift_zext(rtx insn,bool speed_p)1183 find_decomposable_shift_zext (rtx insn, bool speed_p)
1184 {
1185   rtx set;
1186   rtx op;
1187   rtx op_operand;
1188 
1189   set = single_set (insn);
1190   if (!set)
1191     return false;
1192 
1193   op = SET_SRC (set);
1194   if (GET_CODE (op) != ASHIFT
1195       && GET_CODE (op) != LSHIFTRT
1196       && GET_CODE (op) != ASHIFTRT
1197       && GET_CODE (op) != ZERO_EXTEND)
1198     return false;
1199 
1200   op_operand = XEXP (op, 0);
1201   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1202       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1203       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1204       || GET_MODE (op) != twice_word_mode)
1205     return false;
1206 
1207   if (GET_CODE (op) == ZERO_EXTEND)
1208     {
1209       if (GET_MODE (op_operand) != word_mode
1210 	  || !choices[speed_p].splitting_zext)
1211 	return false;
1212     }
1213   else /* left or right shift */
1214     {
1215       bool *splitting = (GET_CODE (op) == ASHIFT
1216 			 ? choices[speed_p].splitting_ashift
1217 			 : GET_CODE (op) == ASHIFTRT
1218 			 ? choices[speed_p].splitting_ashiftrt
1219 			 : choices[speed_p].splitting_lshiftrt);
1220       if (!CONST_INT_P (XEXP (op, 1))
1221 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1222 			2 * BITS_PER_WORD - 1)
1223 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1224 	return false;
1225 
1226       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1227     }
1228 
1229   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1230 
1231   return true;
1232 }
1233 
1234 /* Decompose a more than word wide shift (in INSN) of a multiword
1235    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1236    and 'set to zero' insn.  Return a pointer to the new insn when a
1237    replacement was done.  */
1238 
1239 static rtx
resolve_shift_zext(rtx insn)1240 resolve_shift_zext (rtx insn)
1241 {
1242   rtx set;
1243   rtx op;
1244   rtx op_operand;
1245   rtx insns;
1246   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1247   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1248 
1249   set = single_set (insn);
1250   if (!set)
1251     return NULL_RTX;
1252 
1253   op = SET_SRC (set);
1254   if (GET_CODE (op) != ASHIFT
1255       && GET_CODE (op) != LSHIFTRT
1256       && GET_CODE (op) != ASHIFTRT
1257       && GET_CODE (op) != ZERO_EXTEND)
1258     return NULL_RTX;
1259 
1260   op_operand = XEXP (op, 0);
1261 
1262   /* We can tear this operation apart only if the regs were already
1263      torn apart.  */
1264   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1265     return NULL_RTX;
1266 
1267   /* src_reg_num is the number of the word mode register which we
1268      are operating on.  For a left shift and a zero_extend on little
1269      endian machines this is register 0.  */
1270   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1271 		? 1 : 0;
1272 
1273   if (WORDS_BIG_ENDIAN
1274       && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1275     src_reg_num = 1 - src_reg_num;
1276 
1277   if (GET_CODE (op) == ZERO_EXTEND)
1278     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1279   else
1280     dest_reg_num = 1 - src_reg_num;
1281 
1282   offset1 = UNITS_PER_WORD * dest_reg_num;
1283   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1284   src_offset = UNITS_PER_WORD * src_reg_num;
1285 
1286   start_sequence ();
1287 
1288   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1289                                           GET_MODE (SET_DEST (set)),
1290                                           offset1);
1291   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1292 					    GET_MODE (SET_DEST (set)),
1293 					    offset2);
1294   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1295                                          GET_MODE (op_operand),
1296                                          src_offset);
1297   if (GET_CODE (op) == ASHIFTRT
1298       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1299     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1300 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1301 
1302   if (GET_CODE (op) != ZERO_EXTEND)
1303     {
1304       int shift_count = INTVAL (XEXP (op, 1));
1305       if (shift_count > BITS_PER_WORD)
1306 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1307 				LSHIFT_EXPR : RSHIFT_EXPR,
1308 				word_mode, src_reg,
1309 				shift_count - BITS_PER_WORD,
1310 				dest_reg, GET_CODE (op) != ASHIFTRT);
1311     }
1312 
1313   if (dest_reg != src_reg)
1314     emit_move_insn (dest_reg, src_reg);
1315   if (GET_CODE (op) != ASHIFTRT)
1316     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1317   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1318     emit_move_insn (dest_upper, copy_rtx (src_reg));
1319   else
1320     emit_move_insn (dest_upper, upper_src);
1321   insns = get_insns ();
1322 
1323   end_sequence ();
1324 
1325   emit_insn_before (insns, insn);
1326 
1327   if (dump_file)
1328     {
1329       rtx in;
1330       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1331       for (in = insns; in != insn; in = NEXT_INSN (in))
1332 	fprintf (dump_file, "%d ", INSN_UID (in));
1333       fprintf (dump_file, "\n");
1334     }
1335 
1336   delete_insn (insn);
1337   return insns;
1338 }
1339 
1340 /* Print to dump_file a description of what we're doing with shift code CODE.
1341    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1342 
1343 static void
dump_shift_choices(enum rtx_code code,bool * splitting)1344 dump_shift_choices (enum rtx_code code, bool *splitting)
1345 {
1346   int i;
1347   const char *sep;
1348 
1349   fprintf (dump_file,
1350 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1351 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1352   sep = "";
1353   for (i = 0; i < BITS_PER_WORD; i++)
1354     if (splitting[i])
1355       {
1356 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1357 	sep = ",";
1358       }
1359   fprintf (dump_file, "\n");
1360 }
1361 
1362 /* Print to dump_file a description of what we're doing when optimizing
1363    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1364    of the SPEED_P choice.  */
1365 
1366 static void
dump_choices(bool speed_p,const char * description)1367 dump_choices (bool speed_p, const char *description)
1368 {
1369   unsigned int i;
1370 
1371   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1372 
1373   for (i = 0; i < MAX_MACHINE_MODE; i++)
1374     if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1375       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1376 	       choices[speed_p].move_modes_to_split[i]
1377 	       ? "Splitting"
1378 	       : "Skipping",
1379 	       GET_MODE_NAME ((enum machine_mode) i));
1380 
1381   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1382 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1383 	   GET_MODE_NAME (twice_word_mode));
1384 
1385   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1386   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1387   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1388   fprintf (dump_file, "\n");
1389 }
1390 
1391 /* Look for registers which are always accessed via word-sized SUBREGs
1392    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1393    registers into several word-sized pseudo-registers.  */
1394 
1395 static void
decompose_multiword_subregs(bool decompose_copies)1396 decompose_multiword_subregs (bool decompose_copies)
1397 {
1398   unsigned int max;
1399   basic_block bb;
1400   bool speed_p;
1401 
1402   if (dump_file)
1403     {
1404       dump_choices (false, "size");
1405       dump_choices (true, "speed");
1406     }
1407 
1408   /* Check if this target even has any modes to consider lowering.   */
1409   if (!choices[false].something_to_do && !choices[true].something_to_do)
1410     {
1411       if (dump_file)
1412 	fprintf (dump_file, "Nothing to do!\n");
1413       return;
1414     }
1415 
1416   max = max_reg_num ();
1417 
1418   /* First see if there are any multi-word pseudo-registers.  If there
1419      aren't, there is nothing we can do.  This should speed up this
1420      pass in the normal case, since it should be faster than scanning
1421      all the insns.  */
1422   {
1423     unsigned int i;
1424     bool useful_modes_seen = false;
1425 
1426     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1427       if (regno_reg_rtx[i] != NULL)
1428 	{
1429 	  enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1430 	  if (choices[false].move_modes_to_split[(int) mode]
1431 	      || choices[true].move_modes_to_split[(int) mode])
1432 	    {
1433 	      useful_modes_seen = true;
1434 	      break;
1435 	    }
1436 	}
1437 
1438     if (!useful_modes_seen)
1439       {
1440 	if (dump_file)
1441 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1442 	return;
1443       }
1444   }
1445 
1446   if (df)
1447     {
1448       df_set_flags (DF_DEFER_INSN_RESCAN);
1449       run_word_dce ();
1450     }
1451 
1452   /* FIXME: It may be possible to change this code to look for each
1453      multi-word pseudo-register and to find each insn which sets or
1454      uses that register.  That should be faster than scanning all the
1455      insns.  */
1456 
1457   decomposable_context = BITMAP_ALLOC (NULL);
1458   non_decomposable_context = BITMAP_ALLOC (NULL);
1459   subreg_context = BITMAP_ALLOC (NULL);
1460 
1461   reg_copy_graph.create (max);
1462   reg_copy_graph.safe_grow_cleared (max);
1463   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1464 
1465   speed_p = optimize_function_for_speed_p (cfun);
1466   FOR_EACH_BB_FN (bb, cfun)
1467     {
1468       rtx insn;
1469 
1470       FOR_BB_INSNS (bb, insn)
1471 	{
1472 	  rtx set;
1473 	  enum classify_move_insn cmi;
1474 	  int i, n;
1475 
1476 	  if (!INSN_P (insn)
1477 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1478 	      || GET_CODE (PATTERN (insn)) == USE)
1479 	    continue;
1480 
1481 	  recog_memoized (insn);
1482 
1483 	  if (find_decomposable_shift_zext (insn, speed_p))
1484 	    continue;
1485 
1486 	  extract_insn (insn);
1487 
1488 	  set = simple_move (insn, speed_p);
1489 
1490 	  if (!set)
1491 	    cmi = NOT_SIMPLE_MOVE;
1492 	  else
1493 	    {
1494 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1495 		 second pass only.  The first pass is so early that there is
1496 		 good chance such moves will be optimized away completely by
1497 		 subsequent optimizations anyway.
1498 
1499 		 However, we call find_pseudo_copy even during the first pass
1500 		 so as to properly set up the reg_copy_graph.  */
1501 	      if (find_pseudo_copy (set))
1502 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1503 	      else
1504 		cmi = SIMPLE_MOVE;
1505 	    }
1506 
1507 	  n = recog_data.n_operands;
1508 	  for (i = 0; i < n; ++i)
1509 	    {
1510 	      for_each_rtx (&recog_data.operand[i],
1511 			    find_decomposable_subregs,
1512 			    &cmi);
1513 
1514 	      /* We handle ASM_OPERANDS as a special case to support
1515 		 things like x86 rdtsc which returns a DImode value.
1516 		 We can decompose the output, which will certainly be
1517 		 operand 0, but not the inputs.  */
1518 
1519 	      if (cmi == SIMPLE_MOVE
1520 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1521 		{
1522 		  gcc_assert (i == 0);
1523 		  cmi = NOT_SIMPLE_MOVE;
1524 		}
1525 	    }
1526 	}
1527     }
1528 
1529   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1530   if (!bitmap_empty_p (decomposable_context))
1531     {
1532       sbitmap sub_blocks;
1533       unsigned int i;
1534       sbitmap_iterator sbi;
1535       bitmap_iterator iter;
1536       unsigned int regno;
1537 
1538       propagate_pseudo_copies ();
1539 
1540       sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1541       bitmap_clear (sub_blocks);
1542 
1543       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1544 	decompose_register (regno);
1545 
1546       FOR_EACH_BB_FN (bb, cfun)
1547 	{
1548 	  rtx insn;
1549 
1550 	  FOR_BB_INSNS (bb, insn)
1551 	    {
1552 	      rtx pat;
1553 
1554 	      if (!INSN_P (insn))
1555 		continue;
1556 
1557 	      pat = PATTERN (insn);
1558 	      if (GET_CODE (pat) == CLOBBER)
1559 		resolve_clobber (pat, insn);
1560 	      else if (GET_CODE (pat) == USE)
1561 		resolve_use (pat, insn);
1562 	      else if (DEBUG_INSN_P (insn))
1563 		resolve_debug (insn);
1564 	      else
1565 		{
1566 		  rtx set;
1567 		  int i;
1568 
1569 		  recog_memoized (insn);
1570 		  extract_insn (insn);
1571 
1572 		  set = simple_move (insn, speed_p);
1573 		  if (set)
1574 		    {
1575 		      rtx orig_insn = insn;
1576 		      bool cfi = control_flow_insn_p (insn);
1577 
1578 		      /* We can end up splitting loads to multi-word pseudos
1579 			 into separate loads to machine word size pseudos.
1580 			 When this happens, we first had one load that can
1581 			 throw, and after resolve_simple_move we'll have a
1582 			 bunch of loads (at least two).  All those loads may
1583 			 trap if we can have non-call exceptions, so they
1584 			 all will end the current basic block.  We split the
1585 			 block after the outer loop over all insns, but we
1586 			 make sure here that we will be able to split the
1587 			 basic block and still produce the correct control
1588 			 flow graph for it.  */
1589 		      gcc_assert (!cfi
1590 				  || (cfun->can_throw_non_call_exceptions
1591 				      && can_throw_internal (insn)));
1592 
1593 		      insn = resolve_simple_move (set, insn);
1594 		      if (insn != orig_insn)
1595 			{
1596 			  recog_memoized (insn);
1597 			  extract_insn (insn);
1598 
1599 			  if (cfi)
1600 			    bitmap_set_bit (sub_blocks, bb->index);
1601 			}
1602 		    }
1603 		  else
1604 		    {
1605 		      rtx decomposed_shift;
1606 
1607 		      decomposed_shift = resolve_shift_zext (insn);
1608 		      if (decomposed_shift != NULL_RTX)
1609 			{
1610 			  insn = decomposed_shift;
1611 			  recog_memoized (insn);
1612 			  extract_insn (insn);
1613 			}
1614 		    }
1615 
1616 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1617 		    for_each_rtx (recog_data.operand_loc[i],
1618 				  resolve_subreg_use,
1619 				  insn);
1620 
1621 		  resolve_reg_notes (insn);
1622 
1623 		  if (num_validated_changes () > 0)
1624 		    {
1625 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1626 			{
1627 			  rtx *pl = recog_data.dup_loc[i];
1628 			  int dup_num = recog_data.dup_num[i];
1629 			  rtx *px = recog_data.operand_loc[dup_num];
1630 
1631 			  validate_unshare_change (insn, pl, *px, 1);
1632 			}
1633 
1634 		      i = apply_change_group ();
1635 		      gcc_assert (i);
1636 		    }
1637 		}
1638 	    }
1639 	}
1640 
1641       /* If we had insns to split that caused control flow insns in the middle
1642 	 of a basic block, split those blocks now.  Note that we only handle
1643 	 the case where splitting a load has caused multiple possibly trapping
1644 	 loads to appear.  */
1645       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1646 	{
1647 	  rtx insn, end;
1648 	  edge fallthru;
1649 
1650 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1651 	  insn = BB_HEAD (bb);
1652 	  end = BB_END (bb);
1653 
1654 	  while (insn != end)
1655 	    {
1656 	      if (control_flow_insn_p (insn))
1657 		{
1658 		  /* Split the block after insn.  There will be a fallthru
1659 		     edge, which is OK so we keep it.  We have to create the
1660 		     exception edges ourselves.  */
1661 		  fallthru = split_block (bb, insn);
1662 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1663 		  bb = fallthru->dest;
1664 		  insn = BB_HEAD (bb);
1665 		}
1666 	      else
1667 	        insn = NEXT_INSN (insn);
1668 	    }
1669 	}
1670 
1671       sbitmap_free (sub_blocks);
1672     }
1673 
1674   {
1675     unsigned int i;
1676     bitmap b;
1677 
1678     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1679       if (b)
1680 	BITMAP_FREE (b);
1681   }
1682 
1683   reg_copy_graph.release ();
1684 
1685   BITMAP_FREE (decomposable_context);
1686   BITMAP_FREE (non_decomposable_context);
1687   BITMAP_FREE (subreg_context);
1688 }
1689 
1690 /* Gate function for lower subreg pass.  */
1691 
1692 static bool
gate_handle_lower_subreg(void)1693 gate_handle_lower_subreg (void)
1694 {
1695   return flag_split_wide_types != 0;
1696 }
1697 
1698 /* Implement first lower subreg pass.  */
1699 
1700 static unsigned int
rest_of_handle_lower_subreg(void)1701 rest_of_handle_lower_subreg (void)
1702 {
1703   decompose_multiword_subregs (false);
1704   return 0;
1705 }
1706 
1707 /* Implement second lower subreg pass.  */
1708 
1709 static unsigned int
rest_of_handle_lower_subreg2(void)1710 rest_of_handle_lower_subreg2 (void)
1711 {
1712   decompose_multiword_subregs (true);
1713   return 0;
1714 }
1715 
1716 namespace {
1717 
1718 const pass_data pass_data_lower_subreg =
1719 {
1720   RTL_PASS, /* type */
1721   "subreg1", /* name */
1722   OPTGROUP_NONE, /* optinfo_flags */
1723   true, /* has_gate */
1724   true, /* has_execute */
1725   TV_LOWER_SUBREG, /* tv_id */
1726   0, /* properties_required */
1727   0, /* properties_provided */
1728   0, /* properties_destroyed */
1729   0, /* todo_flags_start */
1730   TODO_verify_flow, /* todo_flags_finish */
1731 };
1732 
1733 class pass_lower_subreg : public rtl_opt_pass
1734 {
1735 public:
pass_lower_subreg(gcc::context * ctxt)1736   pass_lower_subreg (gcc::context *ctxt)
1737     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1738   {}
1739 
1740   /* opt_pass methods: */
gate()1741   bool gate () { return gate_handle_lower_subreg (); }
execute()1742   unsigned int execute () { return rest_of_handle_lower_subreg (); }
1743 
1744 }; // class pass_lower_subreg
1745 
1746 } // anon namespace
1747 
1748 rtl_opt_pass *
make_pass_lower_subreg(gcc::context * ctxt)1749 make_pass_lower_subreg (gcc::context *ctxt)
1750 {
1751   return new pass_lower_subreg (ctxt);
1752 }
1753 
1754 namespace {
1755 
1756 const pass_data pass_data_lower_subreg2 =
1757 {
1758   RTL_PASS, /* type */
1759   "subreg2", /* name */
1760   OPTGROUP_NONE, /* optinfo_flags */
1761   true, /* has_gate */
1762   true, /* has_execute */
1763   TV_LOWER_SUBREG, /* tv_id */
1764   0, /* properties_required */
1765   0, /* properties_provided */
1766   0, /* properties_destroyed */
1767   0, /* todo_flags_start */
1768   ( TODO_df_finish | TODO_verify_rtl_sharing
1769     | TODO_verify_flow ), /* todo_flags_finish */
1770 };
1771 
1772 class pass_lower_subreg2 : public rtl_opt_pass
1773 {
1774 public:
pass_lower_subreg2(gcc::context * ctxt)1775   pass_lower_subreg2 (gcc::context *ctxt)
1776     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1777   {}
1778 
1779   /* opt_pass methods: */
gate()1780   bool gate () { return gate_handle_lower_subreg (); }
execute()1781   unsigned int execute () { return rest_of_handle_lower_subreg2 (); }
1782 
1783 }; // class pass_lower_subreg2
1784 
1785 } // anon namespace
1786 
1787 rtl_opt_pass *
make_pass_lower_subreg2(gcc::context * ctxt)1788 make_pass_lower_subreg2 (gcc::context *ctxt)
1789 {
1790   return new pass_lower_subreg2 (ctxt);
1791 }
1792