1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2013 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "flags.h"
30 #include "insn-config.h"
31 #include "obstack.h"
32 #include "basic-block.h"
33 #include "recog.h"
34 #include "bitmap.h"
35 #include "dce.h"
36 #include "expr.h"
37 #include "except.h"
38 #include "regs.h"
39 #include "tree-pass.h"
40 #include "df.h"
41 #include "lower-subreg.h"
42 
43 #ifdef STACK_GROWS_DOWNWARD
44 # undef STACK_GROWS_DOWNWARD
45 # define STACK_GROWS_DOWNWARD 1
46 #else
47 # define STACK_GROWS_DOWNWARD 0
48 #endif
49 
50 
51 /* Decompose multi-word pseudo-registers into individual
52    pseudo-registers when possible and profitable.  This is possible
53    when all the uses of a multi-word register are via SUBREG, or are
54    copies of the register to another location.  Breaking apart the
55    register permits more CSE and permits better register allocation.
56    This is profitable if the machine does not have move instructions
57    to do this.
58 
59    This pass only splits moves with modes that are wider than
60    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
61    integer modes that are twice the width of word_mode.  The latter
62    could be generalized if there was a need to do this, but the trend in
63    architectures is to not need this.
64 
65    There are two useful preprocessor defines for use by maintainers:
66 
67    #define LOG_COSTS 1
68 
69    if you wish to see the actual cost estimates that are being used
70    for each mode wider than word mode and the cost estimates for zero
71    extension and the shifts.   This can be useful when port maintainers
72    are tuning insn rtx costs.
73 
74    #define FORCE_LOWERING 1
75 
76    if you wish to test the pass with all the transformation forced on.
77    This can be useful for finding bugs in the transformations.  */
78 
79 #define LOG_COSTS 0
80 #define FORCE_LOWERING 0
81 
82 /* Bit N in this bitmap is set if regno N is used in a context in
83    which we can decompose it.  */
84 static bitmap decomposable_context;
85 
86 /* Bit N in this bitmap is set if regno N is used in a context in
87    which it can not be decomposed.  */
88 static bitmap non_decomposable_context;
89 
90 /* Bit N in this bitmap is set if regno N is used in a subreg
91    which changes the mode but not the size.  This typically happens
92    when the register accessed as a floating-point value; we want to
93    avoid generating accesses to its subwords in integer modes.  */
94 static bitmap subreg_context;
95 
96 /* Bit N in the bitmap in element M of this array is set if there is a
97    copy from reg M to reg N.  */
98 static vec<bitmap> reg_copy_graph;
99 
100 struct target_lower_subreg default_target_lower_subreg;
101 #if SWITCHABLE_TARGET
102 struct target_lower_subreg *this_target_lower_subreg
103   = &default_target_lower_subreg;
104 #endif
105 
106 #define twice_word_mode \
107   this_target_lower_subreg->x_twice_word_mode
108 #define choices \
109   this_target_lower_subreg->x_choices
110 
111 /* RTXes used while computing costs.  */
112 struct cost_rtxes {
113   /* Source and target registers.  */
114   rtx source;
115   rtx target;
116 
117   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
118   rtx zext;
119 
120   /* A shift of SOURCE.  */
121   rtx shift;
122 
123   /* A SET of TARGET.  */
124   rtx set;
125 };
126 
127 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
128    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
129 
130 static int
shift_cost(bool speed_p,struct cost_rtxes * rtxes,enum rtx_code code,enum machine_mode mode,int op1)131 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
132 	    enum machine_mode mode, int op1)
133 {
134   PUT_CODE (rtxes->shift, code);
135   PUT_MODE (rtxes->shift, mode);
136   PUT_MODE (rtxes->source, mode);
137   XEXP (rtxes->shift, 1) = GEN_INT (op1);
138   return set_src_cost (rtxes->shift, speed_p);
139 }
140 
141 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
142    to true if it is profitable to split a double-word CODE shift
143    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
144    for speed or size profitability.
145 
146    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
147    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
148    is the cost of moving between word registers.  */
149 
150 static void
compute_splitting_shift(bool speed_p,struct cost_rtxes * rtxes,bool * splitting,enum rtx_code code,int word_move_zero_cost,int word_move_cost)151 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
152 			 bool *splitting, enum rtx_code code,
153 			 int word_move_zero_cost, int word_move_cost)
154 {
155   int wide_cost, narrow_cost, upper_cost, i;
156 
157   for (i = 0; i < BITS_PER_WORD; i++)
158     {
159       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
160 			      i + BITS_PER_WORD);
161       if (i == 0)
162 	narrow_cost = word_move_cost;
163       else
164 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
165 
166       if (code != ASHIFTRT)
167 	upper_cost = word_move_zero_cost;
168       else if (i == BITS_PER_WORD - 1)
169 	upper_cost = word_move_cost;
170       else
171 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
172 				 BITS_PER_WORD - 1);
173 
174       if (LOG_COSTS)
175 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
176 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
177 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
178 
179       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
180 	splitting[i] = true;
181     }
182 }
183 
184 /* Compute what we should do when optimizing for speed or size; SPEED_P
185    selects which.  Use RTXES for computing costs.  */
186 
187 static void
compute_costs(bool speed_p,struct cost_rtxes * rtxes)188 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
189 {
190   unsigned int i;
191   int word_move_zero_cost, word_move_cost;
192 
193   PUT_MODE (rtxes->target, word_mode);
194   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
195   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
196 
197   SET_SRC (rtxes->set) = rtxes->source;
198   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
199 
200   if (LOG_COSTS)
201     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
202 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
203 
204   for (i = 0; i < MAX_MACHINE_MODE; i++)
205     {
206       enum machine_mode mode = (enum machine_mode) i;
207       int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
208       if (factor > 1)
209 	{
210 	  int mode_move_cost;
211 
212 	  PUT_MODE (rtxes->target, mode);
213 	  PUT_MODE (rtxes->source, mode);
214 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
215 
216 	  if (LOG_COSTS)
217 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
218 		     GET_MODE_NAME (mode), mode_move_cost,
219 		     word_move_cost, factor);
220 
221 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
222 	    {
223 	      choices[speed_p].move_modes_to_split[i] = true;
224 	      choices[speed_p].something_to_do = true;
225 	    }
226 	}
227     }
228 
229   /* For the moves and shifts, the only case that is checked is one
230      where the mode of the target is an integer mode twice the width
231      of the word_mode.
232 
233      If it is not profitable to split a double word move then do not
234      even consider the shifts or the zero extension.  */
235   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
236     {
237       int zext_cost;
238 
239       /* The only case here to check to see if moving the upper part with a
240 	 zero is cheaper than doing the zext itself.  */
241       PUT_MODE (rtxes->source, word_mode);
242       zext_cost = set_src_cost (rtxes->zext, speed_p);
243 
244       if (LOG_COSTS)
245 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
246 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
247 		 zext_cost, word_move_cost, word_move_zero_cost);
248 
249       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
250 	choices[speed_p].splitting_zext = true;
251 
252       compute_splitting_shift (speed_p, rtxes,
253 			       choices[speed_p].splitting_ashift, ASHIFT,
254 			       word_move_zero_cost, word_move_cost);
255       compute_splitting_shift (speed_p, rtxes,
256 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
257 			       word_move_zero_cost, word_move_cost);
258       compute_splitting_shift (speed_p, rtxes,
259 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
260 			       word_move_zero_cost, word_move_cost);
261     }
262 }
263 
264 /* Do one-per-target initialisation.  This involves determining
265    which operations on the machine are profitable.  If none are found,
266    then the pass just returns when called.  */
267 
268 void
init_lower_subreg(void)269 init_lower_subreg (void)
270 {
271   struct cost_rtxes rtxes;
272 
273   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
274 
275   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
276 
277   rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
278   rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
279   rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
280   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
281   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
282 
283   if (LOG_COSTS)
284     fprintf (stderr, "\nSize costs\n==========\n\n");
285   compute_costs (false, &rtxes);
286 
287   if (LOG_COSTS)
288     fprintf (stderr, "\nSpeed costs\n===========\n\n");
289   compute_costs (true, &rtxes);
290 }
291 
292 static bool
simple_move_operand(rtx x)293 simple_move_operand (rtx x)
294 {
295   if (GET_CODE (x) == SUBREG)
296     x = SUBREG_REG (x);
297 
298   if (!OBJECT_P (x))
299     return false;
300 
301   if (GET_CODE (x) == LABEL_REF
302       || GET_CODE (x) == SYMBOL_REF
303       || GET_CODE (x) == HIGH
304       || GET_CODE (x) == CONST)
305     return false;
306 
307   if (MEM_P (x)
308       && (MEM_VOLATILE_P (x)
309 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
310     return false;
311 
312   return true;
313 }
314 
315 /* If INSN is a single set between two objects that we want to split,
316    return the single set.  SPEED_P says whether we are optimizing
317    INSN for speed or size.
318 
319    INSN should have been passed to recog and extract_insn before this
320    is called.  */
321 
322 static rtx
simple_move(rtx insn,bool speed_p)323 simple_move (rtx insn, bool speed_p)
324 {
325   rtx x;
326   rtx set;
327   enum machine_mode mode;
328 
329   if (recog_data.n_operands != 2)
330     return NULL_RTX;
331 
332   set = single_set (insn);
333   if (!set)
334     return NULL_RTX;
335 
336   x = SET_DEST (set);
337   if (x != recog_data.operand[0] && x != recog_data.operand[1])
338     return NULL_RTX;
339   if (!simple_move_operand (x))
340     return NULL_RTX;
341 
342   x = SET_SRC (set);
343   if (x != recog_data.operand[0] && x != recog_data.operand[1])
344     return NULL_RTX;
345   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
346      things like x86 rdtsc which returns a DImode value.  */
347   if (GET_CODE (x) != ASM_OPERANDS
348       && !simple_move_operand (x))
349     return NULL_RTX;
350 
351   /* We try to decompose in integer modes, to avoid generating
352      inefficient code copying between integer and floating point
353      registers.  That means that we can't decompose if this is a
354      non-integer mode for which there is no integer mode of the same
355      size.  */
356   mode = GET_MODE (SET_DEST (set));
357   if (!SCALAR_INT_MODE_P (mode)
358       && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
359 	  == BLKmode))
360     return NULL_RTX;
361 
362   /* Reject PARTIAL_INT modes.  They are used for processor specific
363      purposes and it's probably best not to tamper with them.  */
364   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
365     return NULL_RTX;
366 
367   if (!choices[speed_p].move_modes_to_split[(int) mode])
368     return NULL_RTX;
369 
370   return set;
371 }
372 
373 /* If SET is a copy from one multi-word pseudo-register to another,
374    record that in reg_copy_graph.  Return whether it is such a
375    copy.  */
376 
377 static bool
find_pseudo_copy(rtx set)378 find_pseudo_copy (rtx set)
379 {
380   rtx dest = SET_DEST (set);
381   rtx src = SET_SRC (set);
382   unsigned int rd, rs;
383   bitmap b;
384 
385   if (!REG_P (dest) || !REG_P (src))
386     return false;
387 
388   rd = REGNO (dest);
389   rs = REGNO (src);
390   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
391     return false;
392 
393   b = reg_copy_graph[rs];
394   if (b == NULL)
395     {
396       b = BITMAP_ALLOC (NULL);
397       reg_copy_graph[rs] = b;
398     }
399 
400   bitmap_set_bit (b, rd);
401 
402   return true;
403 }
404 
405 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
406    where they are copied to another register, add the register to
407    which they are copied to DECOMPOSABLE_CONTEXT.  Use
408    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
409    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
410 
411 static void
propagate_pseudo_copies(void)412 propagate_pseudo_copies (void)
413 {
414   bitmap queue, propagate;
415 
416   queue = BITMAP_ALLOC (NULL);
417   propagate = BITMAP_ALLOC (NULL);
418 
419   bitmap_copy (queue, decomposable_context);
420   do
421     {
422       bitmap_iterator iter;
423       unsigned int i;
424 
425       bitmap_clear (propagate);
426 
427       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
428 	{
429 	  bitmap b = reg_copy_graph[i];
430 	  if (b)
431 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
432 	}
433 
434       bitmap_and_compl (queue, propagate, decomposable_context);
435       bitmap_ior_into (decomposable_context, propagate);
436     }
437   while (!bitmap_empty_p (queue));
438 
439   BITMAP_FREE (queue);
440   BITMAP_FREE (propagate);
441 }
442 
443 /* A pointer to one of these values is passed to
444    find_decomposable_subregs via for_each_rtx.  */
445 
446 enum classify_move_insn
447 {
448   /* Not a simple move from one location to another.  */
449   NOT_SIMPLE_MOVE,
450   /* A simple move we want to decompose.  */
451   DECOMPOSABLE_SIMPLE_MOVE,
452   /* Any other simple move.  */
453   SIMPLE_MOVE
454 };
455 
456 /* This is called via for_each_rtx.  If we find a SUBREG which we
457    could use to decompose a pseudo-register, set a bit in
458    DECOMPOSABLE_CONTEXT.  If we find an unadorned register which is
459    not a simple pseudo-register copy, DATA will point at the type of
460    move, and we set a bit in DECOMPOSABLE_CONTEXT or
461    NON_DECOMPOSABLE_CONTEXT as appropriate.  */
462 
463 static int
find_decomposable_subregs(rtx * px,void * data)464 find_decomposable_subregs (rtx *px, void *data)
465 {
466   enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
467   rtx x = *px;
468 
469   if (x == NULL_RTX)
470     return 0;
471 
472   if (GET_CODE (x) == SUBREG)
473     {
474       rtx inner = SUBREG_REG (x);
475       unsigned int regno, outer_size, inner_size, outer_words, inner_words;
476 
477       if (!REG_P (inner))
478 	return 0;
479 
480       regno = REGNO (inner);
481       if (HARD_REGISTER_NUM_P (regno))
482 	return -1;
483 
484       outer_size = GET_MODE_SIZE (GET_MODE (x));
485       inner_size = GET_MODE_SIZE (GET_MODE (inner));
486       outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
487       inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
488 
489       /* We only try to decompose single word subregs of multi-word
490 	 registers.  When we find one, we return -1 to avoid iterating
491 	 over the inner register.
492 
493 	 ??? This doesn't allow, e.g., DImode subregs of TImode values
494 	 on 32-bit targets.  We would need to record the way the
495 	 pseudo-register was used, and only decompose if all the uses
496 	 were the same number and size of pieces.  Hopefully this
497 	 doesn't happen much.  */
498 
499       if (outer_words == 1 && inner_words > 1)
500 	{
501 	  bitmap_set_bit (decomposable_context, regno);
502 	  return -1;
503 	}
504 
505       /* If this is a cast from one mode to another, where the modes
506 	 have the same size, and they are not tieable, then mark this
507 	 register as non-decomposable.  If we decompose it we are
508 	 likely to mess up whatever the backend is trying to do.  */
509       if (outer_words > 1
510 	  && outer_size == inner_size
511 	  && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
512 	{
513 	  bitmap_set_bit (non_decomposable_context, regno);
514 	  bitmap_set_bit (subreg_context, regno);
515 	  return -1;
516 	}
517     }
518   else if (REG_P (x))
519     {
520       unsigned int regno;
521 
522       /* We will see an outer SUBREG before we see the inner REG, so
523 	 when we see a plain REG here it means a direct reference to
524 	 the register.
525 
526 	 If this is not a simple copy from one location to another,
527 	 then we can not decompose this register.  If this is a simple
528 	 copy we want to decompose, and the mode is right,
529 	 then we mark the register as decomposable.
530 	 Otherwise we don't say anything about this register --
531 	 it could be decomposed, but whether that would be
532 	 profitable depends upon how it is used elsewhere.
533 
534 	 We only set bits in the bitmap for multi-word
535 	 pseudo-registers, since those are the only ones we care about
536 	 and it keeps the size of the bitmaps down.  */
537 
538       regno = REGNO (x);
539       if (!HARD_REGISTER_NUM_P (regno)
540 	  && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
541 	{
542 	  switch (*pcmi)
543 	    {
544 	    case NOT_SIMPLE_MOVE:
545 	      bitmap_set_bit (non_decomposable_context, regno);
546 	      break;
547 	    case DECOMPOSABLE_SIMPLE_MOVE:
548 	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
549 		bitmap_set_bit (decomposable_context, regno);
550 	      break;
551 	    case SIMPLE_MOVE:
552 	      break;
553 	    default:
554 	      gcc_unreachable ();
555 	    }
556 	}
557     }
558   else if (MEM_P (x))
559     {
560       enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
561 
562       /* Any registers used in a MEM do not participate in a
563 	 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
564 	 here, and return -1 to block the parent's recursion.  */
565       for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
566       return -1;
567     }
568 
569   return 0;
570 }
571 
572 /* Decompose REGNO into word-sized components.  We smash the REG node
573    in place.  This ensures that (1) something goes wrong quickly if we
574    fail to make some replacement, and (2) the debug information inside
575    the symbol table is automatically kept up to date.  */
576 
577 static void
decompose_register(unsigned int regno)578 decompose_register (unsigned int regno)
579 {
580   rtx reg;
581   unsigned int words, i;
582   rtvec v;
583 
584   reg = regno_reg_rtx[regno];
585 
586   regno_reg_rtx[regno] = NULL_RTX;
587 
588   words = GET_MODE_SIZE (GET_MODE (reg));
589   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
590 
591   v = rtvec_alloc (words);
592   for (i = 0; i < words; ++i)
593     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
594 
595   PUT_CODE (reg, CONCATN);
596   XVEC (reg, 0) = v;
597 
598   if (dump_file)
599     {
600       fprintf (dump_file, "; Splitting reg %u ->", regno);
601       for (i = 0; i < words; ++i)
602 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
603       fputc ('\n', dump_file);
604     }
605 }
606 
607 /* Get a SUBREG of a CONCATN.  */
608 
609 static rtx
simplify_subreg_concatn(enum machine_mode outermode,rtx op,unsigned int byte)610 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
611 			 unsigned int byte)
612 {
613   unsigned int inner_size;
614   enum machine_mode innermode, partmode;
615   rtx part;
616   unsigned int final_offset;
617 
618   gcc_assert (GET_CODE (op) == CONCATN);
619   gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
620 
621   innermode = GET_MODE (op);
622   gcc_assert (byte < GET_MODE_SIZE (innermode));
623   gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
624 
625   inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
626   part = XVECEXP (op, 0, byte / inner_size);
627   partmode = GET_MODE (part);
628 
629   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
630      regular CONST_VECTORs.  They have vector or integer modes, depending
631      on the capabilities of the target.  Cope with them.  */
632   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
633     partmode = GET_MODE_INNER (innermode);
634   else if (partmode == VOIDmode)
635     {
636       enum mode_class mclass = GET_MODE_CLASS (innermode);
637       partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
638     }
639 
640   final_offset = byte % inner_size;
641   if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
642     return NULL_RTX;
643 
644   return simplify_gen_subreg (outermode, part, partmode, final_offset);
645 }
646 
647 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
648 
649 static rtx
simplify_gen_subreg_concatn(enum machine_mode outermode,rtx op,enum machine_mode innermode,unsigned int byte)650 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
651 			     enum machine_mode innermode, unsigned int byte)
652 {
653   rtx ret;
654 
655   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
656      If OP is a SUBREG of a CONCATN, then it must be a simple mode
657      change with the same size and offset 0, or it must extract a
658      part.  We shouldn't see anything else here.  */
659   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
660     {
661       rtx op2;
662 
663       if ((GET_MODE_SIZE (GET_MODE (op))
664 	   == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
665 	  && SUBREG_BYTE (op) == 0)
666 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
667 					    GET_MODE (SUBREG_REG (op)), byte);
668 
669       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
670 				     SUBREG_BYTE (op));
671       if (op2 == NULL_RTX)
672 	{
673 	  /* We don't handle paradoxical subregs here.  */
674 	  gcc_assert (GET_MODE_SIZE (outermode)
675 		      <= GET_MODE_SIZE (GET_MODE (op)));
676 	  gcc_assert (GET_MODE_SIZE (GET_MODE (op))
677 		      <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
678 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
679 					 byte + SUBREG_BYTE (op));
680 	  gcc_assert (op2 != NULL_RTX);
681 	  return op2;
682 	}
683 
684       op = op2;
685       gcc_assert (op != NULL_RTX);
686       gcc_assert (innermode == GET_MODE (op));
687     }
688 
689   if (GET_CODE (op) == CONCATN)
690     return simplify_subreg_concatn (outermode, op, byte);
691 
692   ret = simplify_gen_subreg (outermode, op, innermode, byte);
693 
694   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
695      resolve_simple_move will ask for the high part of the paradoxical
696      subreg, which does not have a value.  Just return a zero.  */
697   if (ret == NULL_RTX
698       && GET_CODE (op) == SUBREG
699       && SUBREG_BYTE (op) == 0
700       && (GET_MODE_SIZE (innermode)
701 	  > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
702     return CONST0_RTX (outermode);
703 
704   gcc_assert (ret != NULL_RTX);
705   return ret;
706 }
707 
708 /* Return whether we should resolve X into the registers into which it
709    was decomposed.  */
710 
711 static bool
resolve_reg_p(rtx x)712 resolve_reg_p (rtx x)
713 {
714   return GET_CODE (x) == CONCATN;
715 }
716 
717 /* Return whether X is a SUBREG of a register which we need to
718    resolve.  */
719 
720 static bool
resolve_subreg_p(rtx x)721 resolve_subreg_p (rtx x)
722 {
723   if (GET_CODE (x) != SUBREG)
724     return false;
725   return resolve_reg_p (SUBREG_REG (x));
726 }
727 
728 /* This is called via for_each_rtx.  Look for SUBREGs which need to be
729    decomposed.  */
730 
731 static int
resolve_subreg_use(rtx * px,void * data)732 resolve_subreg_use (rtx *px, void *data)
733 {
734   rtx insn = (rtx) data;
735   rtx x = *px;
736 
737   if (x == NULL_RTX)
738     return 0;
739 
740   if (resolve_subreg_p (x))
741     {
742       x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
743 				   SUBREG_BYTE (x));
744 
745       /* It is possible for a note to contain a reference which we can
746 	 decompose.  In this case, return 1 to the caller to indicate
747 	 that the note must be removed.  */
748       if (!x)
749 	{
750 	  gcc_assert (!insn);
751 	  return 1;
752 	}
753 
754       validate_change (insn, px, x, 1);
755       return -1;
756     }
757 
758   if (resolve_reg_p (x))
759     {
760       /* Return 1 to the caller to indicate that we found a direct
761 	 reference to a register which is being decomposed.  This can
762 	 happen inside notes, multiword shift or zero-extend
763 	 instructions.  */
764       return 1;
765     }
766 
767   return 0;
768 }
769 
770 /* This is called via for_each_rtx.  Look for SUBREGs which can be
771    decomposed and decomposed REGs that need copying.  */
772 
773 static int
adjust_decomposed_uses(rtx * px,void * data ATTRIBUTE_UNUSED)774 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
775 {
776   rtx x = *px;
777 
778   if (x == NULL_RTX)
779     return 0;
780 
781   if (resolve_subreg_p (x))
782     {
783       x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
784 				   SUBREG_BYTE (x));
785 
786       if (x)
787 	*px = x;
788       else
789 	x = copy_rtx (*px);
790     }
791 
792   if (resolve_reg_p (x))
793     *px = copy_rtx (x);
794 
795   return 0;
796 }
797 
798 /* Resolve any decomposed registers which appear in register notes on
799    INSN.  */
800 
801 static void
resolve_reg_notes(rtx insn)802 resolve_reg_notes (rtx insn)
803 {
804   rtx *pnote, note;
805 
806   note = find_reg_equal_equiv_note (insn);
807   if (note)
808     {
809       int old_count = num_validated_changes ();
810       if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
811 	remove_note (insn, note);
812       else
813 	if (old_count != num_validated_changes ())
814 	  df_notes_rescan (insn);
815     }
816 
817   pnote = &REG_NOTES (insn);
818   while (*pnote != NULL_RTX)
819     {
820       bool del = false;
821 
822       note = *pnote;
823       switch (REG_NOTE_KIND (note))
824 	{
825 	case REG_DEAD:
826 	case REG_UNUSED:
827 	  if (resolve_reg_p (XEXP (note, 0)))
828 	    del = true;
829 	  break;
830 
831 	default:
832 	  break;
833 	}
834 
835       if (del)
836 	*pnote = XEXP (note, 1);
837       else
838 	pnote = &XEXP (note, 1);
839     }
840 }
841 
842 /* Return whether X can be decomposed into subwords.  */
843 
844 static bool
can_decompose_p(rtx x)845 can_decompose_p (rtx x)
846 {
847   if (REG_P (x))
848     {
849       unsigned int regno = REGNO (x);
850 
851       if (HARD_REGISTER_NUM_P (regno))
852 	{
853 	  unsigned int byte, num_bytes;
854 
855 	  num_bytes = GET_MODE_SIZE (GET_MODE (x));
856 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
857 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
858 	      return false;
859 	  return true;
860 	}
861       else
862 	return !bitmap_bit_p (subreg_context, regno);
863     }
864 
865   return true;
866 }
867 
868 /* Decompose the registers used in a simple move SET within INSN.  If
869    we don't change anything, return INSN, otherwise return the start
870    of the sequence of moves.  */
871 
872 static rtx
resolve_simple_move(rtx set,rtx insn)873 resolve_simple_move (rtx set, rtx insn)
874 {
875   rtx src, dest, real_dest, insns;
876   enum machine_mode orig_mode, dest_mode;
877   unsigned int words;
878   bool pushing;
879 
880   src = SET_SRC (set);
881   dest = SET_DEST (set);
882   orig_mode = GET_MODE (dest);
883 
884   words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
885   gcc_assert (words > 1);
886 
887   start_sequence ();
888 
889   /* We have to handle copying from a SUBREG of a decomposed reg where
890      the SUBREG is larger than word size.  Rather than assume that we
891      can take a word_mode SUBREG of the destination, we copy to a new
892      register and then copy that to the destination.  */
893 
894   real_dest = NULL_RTX;
895 
896   if (GET_CODE (src) == SUBREG
897       && resolve_reg_p (SUBREG_REG (src))
898       && (SUBREG_BYTE (src) != 0
899 	  || (GET_MODE_SIZE (orig_mode)
900 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
901     {
902       real_dest = dest;
903       dest = gen_reg_rtx (orig_mode);
904       if (REG_P (real_dest))
905 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
906     }
907 
908   /* Similarly if we are copying to a SUBREG of a decomposed reg where
909      the SUBREG is larger than word size.  */
910 
911   if (GET_CODE (dest) == SUBREG
912       && resolve_reg_p (SUBREG_REG (dest))
913       && (SUBREG_BYTE (dest) != 0
914 	  || (GET_MODE_SIZE (orig_mode)
915 	      != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
916     {
917       rtx reg, minsn, smove;
918 
919       reg = gen_reg_rtx (orig_mode);
920       minsn = emit_move_insn (reg, src);
921       smove = single_set (minsn);
922       gcc_assert (smove != NULL_RTX);
923       resolve_simple_move (smove, minsn);
924       src = reg;
925     }
926 
927   /* If we didn't have any big SUBREGS of decomposed registers, and
928      neither side of the move is a register we are decomposing, then
929      we don't have to do anything here.  */
930 
931   if (src == SET_SRC (set)
932       && dest == SET_DEST (set)
933       && !resolve_reg_p (src)
934       && !resolve_subreg_p (src)
935       && !resolve_reg_p (dest)
936       && !resolve_subreg_p (dest))
937     {
938       end_sequence ();
939       return insn;
940     }
941 
942   /* It's possible for the code to use a subreg of a decomposed
943      register while forming an address.  We need to handle that before
944      passing the address to emit_move_insn.  We pass NULL_RTX as the
945      insn parameter to resolve_subreg_use because we can not validate
946      the insn yet.  */
947   if (MEM_P (src) || MEM_P (dest))
948     {
949       int acg;
950 
951       if (MEM_P (src))
952 	for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
953       if (MEM_P (dest))
954 	for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
955       acg = apply_change_group ();
956       gcc_assert (acg);
957     }
958 
959   /* If SRC is a register which we can't decompose, or has side
960      effects, we need to move via a temporary register.  */
961 
962   if (!can_decompose_p (src)
963       || side_effects_p (src)
964       || GET_CODE (src) == ASM_OPERANDS)
965     {
966       rtx reg;
967 
968       reg = gen_reg_rtx (orig_mode);
969       emit_move_insn (reg, src);
970       src = reg;
971     }
972 
973   /* If DEST is a register which we can't decompose, or has side
974      effects, we need to first move to a temporary register.  We
975      handle the common case of pushing an operand directly.  We also
976      go through a temporary register if it holds a floating point
977      value.  This gives us better code on systems which can't move
978      data easily between integer and floating point registers.  */
979 
980   dest_mode = orig_mode;
981   pushing = push_operand (dest, dest_mode);
982   if (!can_decompose_p (dest)
983       || (side_effects_p (dest) && !pushing)
984       || (!SCALAR_INT_MODE_P (dest_mode)
985 	  && !resolve_reg_p (dest)
986 	  && !resolve_subreg_p (dest)))
987     {
988       if (real_dest == NULL_RTX)
989 	real_dest = dest;
990       if (!SCALAR_INT_MODE_P (dest_mode))
991 	{
992 	  dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
993 				     MODE_INT, 0);
994 	  gcc_assert (dest_mode != BLKmode);
995 	}
996       dest = gen_reg_rtx (dest_mode);
997       if (REG_P (real_dest))
998 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
999     }
1000 
1001   if (pushing)
1002     {
1003       unsigned int i, j, jinc;
1004 
1005       gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1006       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1007       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1008 
1009       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1010 	{
1011 	  j = 0;
1012 	  jinc = 1;
1013 	}
1014       else
1015 	{
1016 	  j = words - 1;
1017 	  jinc = -1;
1018 	}
1019 
1020       for (i = 0; i < words; ++i, j += jinc)
1021 	{
1022 	  rtx temp;
1023 
1024 	  temp = copy_rtx (XEXP (dest, 0));
1025 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1026 					       j * UNITS_PER_WORD);
1027 	  emit_move_insn (temp,
1028 			  simplify_gen_subreg_concatn (word_mode, src,
1029 						       orig_mode,
1030 						       j * UNITS_PER_WORD));
1031 	}
1032     }
1033   else
1034     {
1035       unsigned int i;
1036 
1037       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1038 	emit_clobber (dest);
1039 
1040       for (i = 0; i < words; ++i)
1041 	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1042 						     dest_mode,
1043 						     i * UNITS_PER_WORD),
1044 			simplify_gen_subreg_concatn (word_mode, src,
1045 						     orig_mode,
1046 						     i * UNITS_PER_WORD));
1047     }
1048 
1049   if (real_dest != NULL_RTX)
1050     {
1051       rtx mdest, minsn, smove;
1052 
1053       if (dest_mode == orig_mode)
1054 	mdest = dest;
1055       else
1056 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1057       minsn = emit_move_insn (real_dest, mdest);
1058 
1059       smove = single_set (minsn);
1060       gcc_assert (smove != NULL_RTX);
1061 
1062       resolve_simple_move (smove, minsn);
1063     }
1064 
1065   insns = get_insns ();
1066   end_sequence ();
1067 
1068   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1069 
1070   emit_insn_before (insns, insn);
1071 
1072   delete_insn (insn);
1073 
1074   return insns;
1075 }
1076 
1077 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1078    component registers.  Return whether we changed something.  */
1079 
1080 static bool
resolve_clobber(rtx pat,rtx insn)1081 resolve_clobber (rtx pat, rtx insn)
1082 {
1083   rtx reg;
1084   enum machine_mode orig_mode;
1085   unsigned int words, i;
1086   int ret;
1087 
1088   reg = XEXP (pat, 0);
1089   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1090     return false;
1091 
1092   orig_mode = GET_MODE (reg);
1093   words = GET_MODE_SIZE (orig_mode);
1094   words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1095 
1096   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1097 			 simplify_gen_subreg_concatn (word_mode, reg,
1098 						      orig_mode, 0),
1099 			 0);
1100   df_insn_rescan (insn);
1101   gcc_assert (ret != 0);
1102 
1103   for (i = words - 1; i > 0; --i)
1104     {
1105       rtx x;
1106 
1107       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1108 				       i * UNITS_PER_WORD);
1109       x = gen_rtx_CLOBBER (VOIDmode, x);
1110       emit_insn_after (x, insn);
1111     }
1112 
1113   resolve_reg_notes (insn);
1114 
1115   return true;
1116 }
1117 
1118 /* A USE of a decomposed register is no longer meaningful.  Return
1119    whether we changed something.  */
1120 
1121 static bool
resolve_use(rtx pat,rtx insn)1122 resolve_use (rtx pat, rtx insn)
1123 {
1124   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1125     {
1126       delete_insn (insn);
1127       return true;
1128     }
1129 
1130   resolve_reg_notes (insn);
1131 
1132   return false;
1133 }
1134 
1135 /* A VAR_LOCATION can be simplified.  */
1136 
1137 static void
resolve_debug(rtx insn)1138 resolve_debug (rtx insn)
1139 {
1140   for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1141 
1142   df_insn_rescan (insn);
1143 
1144   resolve_reg_notes (insn);
1145 }
1146 
1147 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1148    set the decomposable_context bitmap accordingly.  SPEED_P is true
1149    if we are optimizing INSN for speed rather than size.  Return true
1150    if INSN is decomposable.  */
1151 
1152 static bool
find_decomposable_shift_zext(rtx insn,bool speed_p)1153 find_decomposable_shift_zext (rtx insn, bool speed_p)
1154 {
1155   rtx set;
1156   rtx op;
1157   rtx op_operand;
1158 
1159   set = single_set (insn);
1160   if (!set)
1161     return false;
1162 
1163   op = SET_SRC (set);
1164   if (GET_CODE (op) != ASHIFT
1165       && GET_CODE (op) != LSHIFTRT
1166       && GET_CODE (op) != ASHIFTRT
1167       && GET_CODE (op) != ZERO_EXTEND)
1168     return false;
1169 
1170   op_operand = XEXP (op, 0);
1171   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1172       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1173       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1174       || GET_MODE (op) != twice_word_mode)
1175     return false;
1176 
1177   if (GET_CODE (op) == ZERO_EXTEND)
1178     {
1179       if (GET_MODE (op_operand) != word_mode
1180 	  || !choices[speed_p].splitting_zext)
1181 	return false;
1182     }
1183   else /* left or right shift */
1184     {
1185       bool *splitting = (GET_CODE (op) == ASHIFT
1186 			 ? choices[speed_p].splitting_ashift
1187 			 : GET_CODE (op) == ASHIFTRT
1188 			 ? choices[speed_p].splitting_ashiftrt
1189 			 : choices[speed_p].splitting_lshiftrt);
1190       if (!CONST_INT_P (XEXP (op, 1))
1191 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1192 			2 * BITS_PER_WORD - 1)
1193 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1194 	return false;
1195 
1196       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1197     }
1198 
1199   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1200 
1201   return true;
1202 }
1203 
1204 /* Decompose a more than word wide shift (in INSN) of a multiword
1205    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1206    and 'set to zero' insn.  Return a pointer to the new insn when a
1207    replacement was done.  */
1208 
1209 static rtx
resolve_shift_zext(rtx insn)1210 resolve_shift_zext (rtx insn)
1211 {
1212   rtx set;
1213   rtx op;
1214   rtx op_operand;
1215   rtx insns;
1216   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1217   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1218 
1219   set = single_set (insn);
1220   if (!set)
1221     return NULL_RTX;
1222 
1223   op = SET_SRC (set);
1224   if (GET_CODE (op) != ASHIFT
1225       && GET_CODE (op) != LSHIFTRT
1226       && GET_CODE (op) != ASHIFTRT
1227       && GET_CODE (op) != ZERO_EXTEND)
1228     return NULL_RTX;
1229 
1230   op_operand = XEXP (op, 0);
1231 
1232   /* We can tear this operation apart only if the regs were already
1233      torn apart.  */
1234   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1235     return NULL_RTX;
1236 
1237   /* src_reg_num is the number of the word mode register which we
1238      are operating on.  For a left shift and a zero_extend on little
1239      endian machines this is register 0.  */
1240   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1241 		? 1 : 0;
1242 
1243   if (WORDS_BIG_ENDIAN
1244       && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1245     src_reg_num = 1 - src_reg_num;
1246 
1247   if (GET_CODE (op) == ZERO_EXTEND)
1248     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1249   else
1250     dest_reg_num = 1 - src_reg_num;
1251 
1252   offset1 = UNITS_PER_WORD * dest_reg_num;
1253   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1254   src_offset = UNITS_PER_WORD * src_reg_num;
1255 
1256   start_sequence ();
1257 
1258   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1259                                           GET_MODE (SET_DEST (set)),
1260                                           offset1);
1261   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1262 					    GET_MODE (SET_DEST (set)),
1263 					    offset2);
1264   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1265                                          GET_MODE (op_operand),
1266                                          src_offset);
1267   if (GET_CODE (op) == ASHIFTRT
1268       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1269     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1270 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1271 
1272   if (GET_CODE (op) != ZERO_EXTEND)
1273     {
1274       int shift_count = INTVAL (XEXP (op, 1));
1275       if (shift_count > BITS_PER_WORD)
1276 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1277 				LSHIFT_EXPR : RSHIFT_EXPR,
1278 				word_mode, src_reg,
1279 				shift_count - BITS_PER_WORD,
1280 				dest_reg, GET_CODE (op) != ASHIFTRT);
1281     }
1282 
1283   if (dest_reg != src_reg)
1284     emit_move_insn (dest_reg, src_reg);
1285   if (GET_CODE (op) != ASHIFTRT)
1286     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1287   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1288     emit_move_insn (dest_upper, copy_rtx (src_reg));
1289   else
1290     emit_move_insn (dest_upper, upper_src);
1291   insns = get_insns ();
1292 
1293   end_sequence ();
1294 
1295   emit_insn_before (insns, insn);
1296 
1297   if (dump_file)
1298     {
1299       rtx in;
1300       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1301       for (in = insns; in != insn; in = NEXT_INSN (in))
1302 	fprintf (dump_file, "%d ", INSN_UID (in));
1303       fprintf (dump_file, "\n");
1304     }
1305 
1306   delete_insn (insn);
1307   return insns;
1308 }
1309 
1310 /* Print to dump_file a description of what we're doing with shift code CODE.
1311    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1312 
1313 static void
dump_shift_choices(enum rtx_code code,bool * splitting)1314 dump_shift_choices (enum rtx_code code, bool *splitting)
1315 {
1316   int i;
1317   const char *sep;
1318 
1319   fprintf (dump_file,
1320 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1321 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1322   sep = "";
1323   for (i = 0; i < BITS_PER_WORD; i++)
1324     if (splitting[i])
1325       {
1326 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1327 	sep = ",";
1328       }
1329   fprintf (dump_file, "\n");
1330 }
1331 
1332 /* Print to dump_file a description of what we're doing when optimizing
1333    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1334    of the SPEED_P choice.  */
1335 
1336 static void
dump_choices(bool speed_p,const char * description)1337 dump_choices (bool speed_p, const char *description)
1338 {
1339   unsigned int i;
1340 
1341   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1342 
1343   for (i = 0; i < MAX_MACHINE_MODE; i++)
1344     if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1345       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1346 	       choices[speed_p].move_modes_to_split[i]
1347 	       ? "Splitting"
1348 	       : "Skipping",
1349 	       GET_MODE_NAME ((enum machine_mode) i));
1350 
1351   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1352 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1353 	   GET_MODE_NAME (twice_word_mode));
1354 
1355   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1356   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1357   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1358   fprintf (dump_file, "\n");
1359 }
1360 
1361 /* Look for registers which are always accessed via word-sized SUBREGs
1362    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1363    registers into several word-sized pseudo-registers.  */
1364 
1365 static void
decompose_multiword_subregs(bool decompose_copies)1366 decompose_multiword_subregs (bool decompose_copies)
1367 {
1368   unsigned int max;
1369   basic_block bb;
1370   bool speed_p;
1371 
1372   if (dump_file)
1373     {
1374       dump_choices (false, "size");
1375       dump_choices (true, "speed");
1376     }
1377 
1378   /* Check if this target even has any modes to consider lowering.   */
1379   if (!choices[false].something_to_do && !choices[true].something_to_do)
1380     {
1381       if (dump_file)
1382 	fprintf (dump_file, "Nothing to do!\n");
1383       return;
1384     }
1385 
1386   max = max_reg_num ();
1387 
1388   /* First see if there are any multi-word pseudo-registers.  If there
1389      aren't, there is nothing we can do.  This should speed up this
1390      pass in the normal case, since it should be faster than scanning
1391      all the insns.  */
1392   {
1393     unsigned int i;
1394     bool useful_modes_seen = false;
1395 
1396     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1397       if (regno_reg_rtx[i] != NULL)
1398 	{
1399 	  enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1400 	  if (choices[false].move_modes_to_split[(int) mode]
1401 	      || choices[true].move_modes_to_split[(int) mode])
1402 	    {
1403 	      useful_modes_seen = true;
1404 	      break;
1405 	    }
1406 	}
1407 
1408     if (!useful_modes_seen)
1409       {
1410 	if (dump_file)
1411 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1412 	return;
1413       }
1414   }
1415 
1416   if (df)
1417     {
1418       df_set_flags (DF_DEFER_INSN_RESCAN);
1419       run_word_dce ();
1420     }
1421 
1422   /* FIXME: It may be possible to change this code to look for each
1423      multi-word pseudo-register and to find each insn which sets or
1424      uses that register.  That should be faster than scanning all the
1425      insns.  */
1426 
1427   decomposable_context = BITMAP_ALLOC (NULL);
1428   non_decomposable_context = BITMAP_ALLOC (NULL);
1429   subreg_context = BITMAP_ALLOC (NULL);
1430 
1431   reg_copy_graph.create (max);
1432   reg_copy_graph.safe_grow_cleared (max);
1433   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1434 
1435   speed_p = optimize_function_for_speed_p (cfun);
1436   FOR_EACH_BB (bb)
1437     {
1438       rtx insn;
1439 
1440       FOR_BB_INSNS (bb, insn)
1441 	{
1442 	  rtx set;
1443 	  enum classify_move_insn cmi;
1444 	  int i, n;
1445 
1446 	  if (!INSN_P (insn)
1447 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1448 	      || GET_CODE (PATTERN (insn)) == USE)
1449 	    continue;
1450 
1451 	  recog_memoized (insn);
1452 
1453 	  if (find_decomposable_shift_zext (insn, speed_p))
1454 	    continue;
1455 
1456 	  extract_insn (insn);
1457 
1458 	  set = simple_move (insn, speed_p);
1459 
1460 	  if (!set)
1461 	    cmi = NOT_SIMPLE_MOVE;
1462 	  else
1463 	    {
1464 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1465 		 second pass only.  The first pass is so early that there is
1466 		 good chance such moves will be optimized away completely by
1467 		 subsequent optimizations anyway.
1468 
1469 		 However, we call find_pseudo_copy even during the first pass
1470 		 so as to properly set up the reg_copy_graph.  */
1471 	      if (find_pseudo_copy (set))
1472 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1473 	      else
1474 		cmi = SIMPLE_MOVE;
1475 	    }
1476 
1477 	  n = recog_data.n_operands;
1478 	  for (i = 0; i < n; ++i)
1479 	    {
1480 	      for_each_rtx (&recog_data.operand[i],
1481 			    find_decomposable_subregs,
1482 			    &cmi);
1483 
1484 	      /* We handle ASM_OPERANDS as a special case to support
1485 		 things like x86 rdtsc which returns a DImode value.
1486 		 We can decompose the output, which will certainly be
1487 		 operand 0, but not the inputs.  */
1488 
1489 	      if (cmi == SIMPLE_MOVE
1490 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1491 		{
1492 		  gcc_assert (i == 0);
1493 		  cmi = NOT_SIMPLE_MOVE;
1494 		}
1495 	    }
1496 	}
1497     }
1498 
1499   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1500   if (!bitmap_empty_p (decomposable_context))
1501     {
1502       sbitmap sub_blocks;
1503       unsigned int i;
1504       sbitmap_iterator sbi;
1505       bitmap_iterator iter;
1506       unsigned int regno;
1507 
1508       propagate_pseudo_copies ();
1509 
1510       sub_blocks = sbitmap_alloc (last_basic_block);
1511       bitmap_clear (sub_blocks);
1512 
1513       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1514 	decompose_register (regno);
1515 
1516       FOR_EACH_BB (bb)
1517 	{
1518 	  rtx insn;
1519 
1520 	  FOR_BB_INSNS (bb, insn)
1521 	    {
1522 	      rtx pat;
1523 
1524 	      if (!INSN_P (insn))
1525 		continue;
1526 
1527 	      pat = PATTERN (insn);
1528 	      if (GET_CODE (pat) == CLOBBER)
1529 		resolve_clobber (pat, insn);
1530 	      else if (GET_CODE (pat) == USE)
1531 		resolve_use (pat, insn);
1532 	      else if (DEBUG_INSN_P (insn))
1533 		resolve_debug (insn);
1534 	      else
1535 		{
1536 		  rtx set;
1537 		  int i;
1538 
1539 		  recog_memoized (insn);
1540 		  extract_insn (insn);
1541 
1542 		  set = simple_move (insn, speed_p);
1543 		  if (set)
1544 		    {
1545 		      rtx orig_insn = insn;
1546 		      bool cfi = control_flow_insn_p (insn);
1547 
1548 		      /* We can end up splitting loads to multi-word pseudos
1549 			 into separate loads to machine word size pseudos.
1550 			 When this happens, we first had one load that can
1551 			 throw, and after resolve_simple_move we'll have a
1552 			 bunch of loads (at least two).  All those loads may
1553 			 trap if we can have non-call exceptions, so they
1554 			 all will end the current basic block.  We split the
1555 			 block after the outer loop over all insns, but we
1556 			 make sure here that we will be able to split the
1557 			 basic block and still produce the correct control
1558 			 flow graph for it.  */
1559 		      gcc_assert (!cfi
1560 				  || (cfun->can_throw_non_call_exceptions
1561 				      && can_throw_internal (insn)));
1562 
1563 		      insn = resolve_simple_move (set, insn);
1564 		      if (insn != orig_insn)
1565 			{
1566 			  recog_memoized (insn);
1567 			  extract_insn (insn);
1568 
1569 			  if (cfi)
1570 			    bitmap_set_bit (sub_blocks, bb->index);
1571 			}
1572 		    }
1573 		  else
1574 		    {
1575 		      rtx decomposed_shift;
1576 
1577 		      decomposed_shift = resolve_shift_zext (insn);
1578 		      if (decomposed_shift != NULL_RTX)
1579 			{
1580 			  insn = decomposed_shift;
1581 			  recog_memoized (insn);
1582 			  extract_insn (insn);
1583 			}
1584 		    }
1585 
1586 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1587 		    for_each_rtx (recog_data.operand_loc[i],
1588 				  resolve_subreg_use,
1589 				  insn);
1590 
1591 		  resolve_reg_notes (insn);
1592 
1593 		  if (num_validated_changes () > 0)
1594 		    {
1595 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1596 			{
1597 			  rtx *pl = recog_data.dup_loc[i];
1598 			  int dup_num = recog_data.dup_num[i];
1599 			  rtx *px = recog_data.operand_loc[dup_num];
1600 
1601 			  validate_unshare_change (insn, pl, *px, 1);
1602 			}
1603 
1604 		      i = apply_change_group ();
1605 		      gcc_assert (i);
1606 		    }
1607 		}
1608 	    }
1609 	}
1610 
1611       /* If we had insns to split that caused control flow insns in the middle
1612 	 of a basic block, split those blocks now.  Note that we only handle
1613 	 the case where splitting a load has caused multiple possibly trapping
1614 	 loads to appear.  */
1615       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1616 	{
1617 	  rtx insn, end;
1618 	  edge fallthru;
1619 
1620 	  bb = BASIC_BLOCK (i);
1621 	  insn = BB_HEAD (bb);
1622 	  end = BB_END (bb);
1623 
1624 	  while (insn != end)
1625 	    {
1626 	      if (control_flow_insn_p (insn))
1627 		{
1628 		  /* Split the block after insn.  There will be a fallthru
1629 		     edge, which is OK so we keep it.  We have to create the
1630 		     exception edges ourselves.  */
1631 		  fallthru = split_block (bb, insn);
1632 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1633 		  bb = fallthru->dest;
1634 		  insn = BB_HEAD (bb);
1635 		}
1636 	      else
1637 	        insn = NEXT_INSN (insn);
1638 	    }
1639 	}
1640 
1641       sbitmap_free (sub_blocks);
1642     }
1643 
1644   {
1645     unsigned int i;
1646     bitmap b;
1647 
1648     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1649       if (b)
1650 	BITMAP_FREE (b);
1651   }
1652 
1653   reg_copy_graph.release ();
1654 
1655   BITMAP_FREE (decomposable_context);
1656   BITMAP_FREE (non_decomposable_context);
1657   BITMAP_FREE (subreg_context);
1658 }
1659 
1660 /* Gate function for lower subreg pass.  */
1661 
1662 static bool
gate_handle_lower_subreg(void)1663 gate_handle_lower_subreg (void)
1664 {
1665   return flag_split_wide_types != 0;
1666 }
1667 
1668 /* Implement first lower subreg pass.  */
1669 
1670 static unsigned int
rest_of_handle_lower_subreg(void)1671 rest_of_handle_lower_subreg (void)
1672 {
1673   decompose_multiword_subregs (false);
1674   return 0;
1675 }
1676 
1677 /* Implement second lower subreg pass.  */
1678 
1679 static unsigned int
rest_of_handle_lower_subreg2(void)1680 rest_of_handle_lower_subreg2 (void)
1681 {
1682   decompose_multiword_subregs (true);
1683   return 0;
1684 }
1685 
1686 struct rtl_opt_pass pass_lower_subreg =
1687 {
1688  {
1689   RTL_PASS,
1690   "subreg1",	                        /* name */
1691   OPTGROUP_NONE,                        /* optinfo_flags */
1692   gate_handle_lower_subreg,             /* gate */
1693   rest_of_handle_lower_subreg,          /* execute */
1694   NULL,                                 /* sub */
1695   NULL,                                 /* next */
1696   0,                                    /* static_pass_number */
1697   TV_LOWER_SUBREG,                      /* tv_id */
1698   0,                                    /* properties_required */
1699   0,                                    /* properties_provided */
1700   0,                                    /* properties_destroyed */
1701   0,                                    /* todo_flags_start */
1702   TODO_ggc_collect |
1703   TODO_verify_flow                      /* todo_flags_finish */
1704  }
1705 };
1706 
1707 struct rtl_opt_pass pass_lower_subreg2 =
1708 {
1709  {
1710   RTL_PASS,
1711   "subreg2",	                        /* name */
1712   OPTGROUP_NONE,                        /* optinfo_flags */
1713   gate_handle_lower_subreg,             /* gate */
1714   rest_of_handle_lower_subreg2,          /* execute */
1715   NULL,                                 /* sub */
1716   NULL,                                 /* next */
1717   0,                                    /* static_pass_number */
1718   TV_LOWER_SUBREG,                      /* tv_id */
1719   0,                                    /* properties_required */
1720   0,                                    /* properties_provided */
1721   0,                                    /* properties_destroyed */
1722   0,                                    /* todo_flags_start */
1723   TODO_df_finish | TODO_verify_rtl_sharing |
1724   TODO_ggc_collect |
1725   TODO_verify_flow                      /* todo_flags_finish */
1726  }
1727 };
1728