1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2018 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44 
45 
46 /* Decompose multi-word pseudo-registers into individual
47    pseudo-registers when possible and profitable.  This is possible
48    when all the uses of a multi-word register are via SUBREG, or are
49    copies of the register to another location.  Breaking apart the
50    register permits more CSE and permits better register allocation.
51    This is profitable if the machine does not have move instructions
52    to do this.
53 
54    This pass only splits moves with modes that are wider than
55    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56    integer modes that are twice the width of word_mode.  The latter
57    could be generalized if there was a need to do this, but the trend in
58    architectures is to not need this.
59 
60    There are two useful preprocessor defines for use by maintainers:
61 
62    #define LOG_COSTS 1
63 
64    if you wish to see the actual cost estimates that are being used
65    for each mode wider than word mode and the cost estimates for zero
66    extension and the shifts.   This can be useful when port maintainers
67    are tuning insn rtx costs.
68 
69    #define FORCE_LOWERING 1
70 
71    if you wish to test the pass with all the transformation forced on.
72    This can be useful for finding bugs in the transformations.  */
73 
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76 
77 /* Bit N in this bitmap is set if regno N is used in a context in
78    which we can decompose it.  */
79 static bitmap decomposable_context;
80 
81 /* Bit N in this bitmap is set if regno N is used in a context in
82    which it can not be decomposed.  */
83 static bitmap non_decomposable_context;
84 
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86    which changes the mode but not the size.  This typically happens
87    when the register accessed as a floating-point value; we want to
88    avoid generating accesses to its subwords in integer modes.  */
89 static bitmap subreg_context;
90 
91 /* Bit N in the bitmap in element M of this array is set if there is a
92    copy from reg M to reg N.  */
93 static vec<bitmap> reg_copy_graph;
94 
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98   = &default_target_lower_subreg;
99 #endif
100 
101 #define twice_word_mode \
102   this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104   this_target_lower_subreg->x_choices
105 
106 /* Return true if MODE is a mode we know how to lower.  When returning true,
107    store its byte size in *BYTES and its word size in *WORDS.  */
108 
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 		    unsigned int *words)
112 {
113   if (!GET_MODE_SIZE (mode).is_constant (bytes))
114     return false;
115   *words = CEIL (*bytes, UNITS_PER_WORD);
116   return true;
117 }
118 
119 /* RTXes used while computing costs.  */
120 struct cost_rtxes {
121   /* Source and target registers.  */
122   rtx source;
123   rtx target;
124 
125   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
126   rtx zext;
127 
128   /* A shift of SOURCE.  */
129   rtx shift;
130 
131   /* A SET of TARGET.  */
132   rtx set;
133 };
134 
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
137 
138 static int
139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 	    machine_mode mode, int op1)
141 {
142   PUT_CODE (rtxes->shift, code);
143   PUT_MODE (rtxes->shift, mode);
144   PUT_MODE (rtxes->source, mode);
145   XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146   return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148 
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150    to true if it is profitable to split a double-word CODE shift
151    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
152    for speed or size profitability.
153 
154    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
155    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
156    is the cost of moving between word registers.  */
157 
158 static void
159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 			 bool *splitting, enum rtx_code code,
161 			 int word_move_zero_cost, int word_move_cost)
162 {
163   int wide_cost, narrow_cost, upper_cost, i;
164 
165   for (i = 0; i < BITS_PER_WORD; i++)
166     {
167       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 			      i + BITS_PER_WORD);
169       if (i == 0)
170 	narrow_cost = word_move_cost;
171       else
172 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173 
174       if (code != ASHIFTRT)
175 	upper_cost = word_move_zero_cost;
176       else if (i == BITS_PER_WORD - 1)
177 	upper_cost = word_move_cost;
178       else
179 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 				 BITS_PER_WORD - 1);
181 
182       if (LOG_COSTS)
183 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186 
187       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 	splitting[i] = true;
189     }
190 }
191 
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193    selects which.  Use RTXES for computing costs.  */
194 
195 static void
196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198   unsigned int i;
199   int word_move_zero_cost, word_move_cost;
200 
201   PUT_MODE (rtxes->target, word_mode);
202   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 
205   SET_SRC (rtxes->set) = rtxes->source;
206   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 
208   if (LOG_COSTS)
209     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211 
212   for (i = 0; i < MAX_MACHINE_MODE; i++)
213     {
214       machine_mode mode = (machine_mode) i;
215       unsigned int size, factor;
216       if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 	{
218 	  unsigned int mode_move_cost;
219 
220 	  PUT_MODE (rtxes->target, mode);
221 	  PUT_MODE (rtxes->source, mode);
222 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 
224 	  if (LOG_COSTS)
225 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 		     GET_MODE_NAME (mode), mode_move_cost,
227 		     word_move_cost, factor);
228 
229 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 	    {
231 	      choices[speed_p].move_modes_to_split[i] = true;
232 	      choices[speed_p].something_to_do = true;
233 	    }
234 	}
235     }
236 
237   /* For the moves and shifts, the only case that is checked is one
238      where the mode of the target is an integer mode twice the width
239      of the word_mode.
240 
241      If it is not profitable to split a double word move then do not
242      even consider the shifts or the zero extension.  */
243   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244     {
245       int zext_cost;
246 
247       /* The only case here to check to see if moving the upper part with a
248 	 zero is cheaper than doing the zext itself.  */
249       PUT_MODE (rtxes->source, word_mode);
250       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 
252       if (LOG_COSTS)
253 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 		 zext_cost, word_move_cost, word_move_zero_cost);
256 
257       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 	choices[speed_p].splitting_zext = true;
259 
260       compute_splitting_shift (speed_p, rtxes,
261 			       choices[speed_p].splitting_ashift, ASHIFT,
262 			       word_move_zero_cost, word_move_cost);
263       compute_splitting_shift (speed_p, rtxes,
264 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 			       word_move_zero_cost, word_move_cost);
266       compute_splitting_shift (speed_p, rtxes,
267 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 			       word_move_zero_cost, word_move_cost);
269     }
270 }
271 
272 /* Do one-per-target initialisation.  This involves determining
273    which operations on the machine are profitable.  If none are found,
274    then the pass just returns when called.  */
275 
276 void
277 init_lower_subreg (void)
278 {
279   struct cost_rtxes rtxes;
280 
281   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282 
283   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284 
285   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290 
291   if (LOG_COSTS)
292     fprintf (stderr, "\nSize costs\n==========\n\n");
293   compute_costs (false, &rtxes);
294 
295   if (LOG_COSTS)
296     fprintf (stderr, "\nSpeed costs\n===========\n\n");
297   compute_costs (true, &rtxes);
298 }
299 
300 static bool
301 simple_move_operand (rtx x)
302 {
303   if (GET_CODE (x) == SUBREG)
304     x = SUBREG_REG (x);
305 
306   if (!OBJECT_P (x))
307     return false;
308 
309   if (GET_CODE (x) == LABEL_REF
310       || GET_CODE (x) == SYMBOL_REF
311       || GET_CODE (x) == HIGH
312       || GET_CODE (x) == CONST)
313     return false;
314 
315   if (MEM_P (x)
316       && (MEM_VOLATILE_P (x)
317 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318     return false;
319 
320   return true;
321 }
322 
323 /* If INSN is a single set between two objects that we want to split,
324    return the single set.  SPEED_P says whether we are optimizing
325    INSN for speed or size.
326 
327    INSN should have been passed to recog and extract_insn before this
328    is called.  */
329 
330 static rtx
331 simple_move (rtx_insn *insn, bool speed_p)
332 {
333   rtx x;
334   rtx set;
335   machine_mode mode;
336 
337   if (recog_data.n_operands != 2)
338     return NULL_RTX;
339 
340   set = single_set (insn);
341   if (!set)
342     return NULL_RTX;
343 
344   x = SET_DEST (set);
345   if (x != recog_data.operand[0] && x != recog_data.operand[1])
346     return NULL_RTX;
347   if (!simple_move_operand (x))
348     return NULL_RTX;
349 
350   x = SET_SRC (set);
351   if (x != recog_data.operand[0] && x != recog_data.operand[1])
352     return NULL_RTX;
353   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
354      things like x86 rdtsc which returns a DImode value.  */
355   if (GET_CODE (x) != ASM_OPERANDS
356       && !simple_move_operand (x))
357     return NULL_RTX;
358 
359   /* We try to decompose in integer modes, to avoid generating
360      inefficient code copying between integer and floating point
361      registers.  That means that we can't decompose if this is a
362      non-integer mode for which there is no integer mode of the same
363      size.  */
364   mode = GET_MODE (SET_DEST (set));
365   if (!SCALAR_INT_MODE_P (mode)
366       && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
367     return NULL_RTX;
368 
369   /* Reject PARTIAL_INT modes.  They are used for processor specific
370      purposes and it's probably best not to tamper with them.  */
371   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
372     return NULL_RTX;
373 
374   if (!choices[speed_p].move_modes_to_split[(int) mode])
375     return NULL_RTX;
376 
377   return set;
378 }
379 
380 /* If SET is a copy from one multi-word pseudo-register to another,
381    record that in reg_copy_graph.  Return whether it is such a
382    copy.  */
383 
384 static bool
385 find_pseudo_copy (rtx set)
386 {
387   rtx dest = SET_DEST (set);
388   rtx src = SET_SRC (set);
389   unsigned int rd, rs;
390   bitmap b;
391 
392   if (!REG_P (dest) || !REG_P (src))
393     return false;
394 
395   rd = REGNO (dest);
396   rs = REGNO (src);
397   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
398     return false;
399 
400   b = reg_copy_graph[rs];
401   if (b == NULL)
402     {
403       b = BITMAP_ALLOC (NULL);
404       reg_copy_graph[rs] = b;
405     }
406 
407   bitmap_set_bit (b, rd);
408 
409   return true;
410 }
411 
412 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
413    where they are copied to another register, add the register to
414    which they are copied to DECOMPOSABLE_CONTEXT.  Use
415    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
416    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
417 
418 static void
419 propagate_pseudo_copies (void)
420 {
421   auto_bitmap queue, propagate;
422 
423   bitmap_copy (queue, decomposable_context);
424   do
425     {
426       bitmap_iterator iter;
427       unsigned int i;
428 
429       bitmap_clear (propagate);
430 
431       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
432 	{
433 	  bitmap b = reg_copy_graph[i];
434 	  if (b)
435 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
436 	}
437 
438       bitmap_and_compl (queue, propagate, decomposable_context);
439       bitmap_ior_into (decomposable_context, propagate);
440     }
441   while (!bitmap_empty_p (queue));
442 }
443 
444 /* A pointer to one of these values is passed to
445    find_decomposable_subregs.  */
446 
447 enum classify_move_insn
448 {
449   /* Not a simple move from one location to another.  */
450   NOT_SIMPLE_MOVE,
451   /* A simple move we want to decompose.  */
452   DECOMPOSABLE_SIMPLE_MOVE,
453   /* Any other simple move.  */
454   SIMPLE_MOVE
455 };
456 
457 /* If we find a SUBREG in *LOC which we could use to decompose a
458    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
459    unadorned register which is not a simple pseudo-register copy,
460    DATA will point at the type of move, and we set a bit in
461    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
462 
463 static void
464 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
465 {
466   subrtx_var_iterator::array_type array;
467   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
468     {
469       rtx x = *iter;
470       if (GET_CODE (x) == SUBREG)
471 	{
472 	  rtx inner = SUBREG_REG (x);
473 	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
474 
475 	  if (!REG_P (inner))
476 	    continue;
477 
478 	  regno = REGNO (inner);
479 	  if (HARD_REGISTER_NUM_P (regno))
480 	    {
481 	      iter.skip_subrtxes ();
482 	      continue;
483 	    }
484 
485 	  if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
486 	      || !interesting_mode_p (GET_MODE (inner), &inner_size,
487 				      &inner_words))
488 	    continue;
489 
490 	  /* We only try to decompose single word subregs of multi-word
491 	     registers.  When we find one, we return -1 to avoid iterating
492 	     over the inner register.
493 
494 	     ??? This doesn't allow, e.g., DImode subregs of TImode values
495 	     on 32-bit targets.  We would need to record the way the
496 	     pseudo-register was used, and only decompose if all the uses
497 	     were the same number and size of pieces.  Hopefully this
498 	     doesn't happen much.  */
499 
500 	  if (outer_words == 1 && inner_words > 1)
501 	    {
502 	      bitmap_set_bit (decomposable_context, regno);
503 	      iter.skip_subrtxes ();
504 	      continue;
505 	    }
506 
507 	  /* If this is a cast from one mode to another, where the modes
508 	     have the same size, and they are not tieable, then mark this
509 	     register as non-decomposable.  If we decompose it we are
510 	     likely to mess up whatever the backend is trying to do.  */
511 	  if (outer_words > 1
512 	      && outer_size == inner_size
513 	      && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
514 	    {
515 	      bitmap_set_bit (non_decomposable_context, regno);
516 	      bitmap_set_bit (subreg_context, regno);
517 	      iter.skip_subrtxes ();
518 	      continue;
519 	    }
520 	}
521       else if (REG_P (x))
522 	{
523 	  unsigned int regno, size, words;
524 
525 	  /* We will see an outer SUBREG before we see the inner REG, so
526 	     when we see a plain REG here it means a direct reference to
527 	     the register.
528 
529 	     If this is not a simple copy from one location to another,
530 	     then we can not decompose this register.  If this is a simple
531 	     copy we want to decompose, and the mode is right,
532 	     then we mark the register as decomposable.
533 	     Otherwise we don't say anything about this register --
534 	     it could be decomposed, but whether that would be
535 	     profitable depends upon how it is used elsewhere.
536 
537 	     We only set bits in the bitmap for multi-word
538 	     pseudo-registers, since those are the only ones we care about
539 	     and it keeps the size of the bitmaps down.  */
540 
541 	  regno = REGNO (x);
542 	  if (!HARD_REGISTER_NUM_P (regno)
543 	      && interesting_mode_p (GET_MODE (x), &size, &words)
544 	      && words > 1)
545 	    {
546 	      switch (*pcmi)
547 		{
548 		case NOT_SIMPLE_MOVE:
549 		  bitmap_set_bit (non_decomposable_context, regno);
550 		  break;
551 		case DECOMPOSABLE_SIMPLE_MOVE:
552 		  if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
553 		    bitmap_set_bit (decomposable_context, regno);
554 		  break;
555 		case SIMPLE_MOVE:
556 		  break;
557 		default:
558 		  gcc_unreachable ();
559 		}
560 	    }
561 	}
562       else if (MEM_P (x))
563 	{
564 	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
565 
566 	  /* Any registers used in a MEM do not participate in a
567 	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
568 	     here, and return -1 to block the parent's recursion.  */
569 	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
570 	  iter.skip_subrtxes ();
571 	}
572     }
573 }
574 
575 /* Decompose REGNO into word-sized components.  We smash the REG node
576    in place.  This ensures that (1) something goes wrong quickly if we
577    fail to make some replacement, and (2) the debug information inside
578    the symbol table is automatically kept up to date.  */
579 
580 static void
581 decompose_register (unsigned int regno)
582 {
583   rtx reg;
584   unsigned int size, words, i;
585   rtvec v;
586 
587   reg = regno_reg_rtx[regno];
588 
589   regno_reg_rtx[regno] = NULL_RTX;
590 
591   if (!interesting_mode_p (GET_MODE (reg), &size, &words))
592     gcc_unreachable ();
593 
594   v = rtvec_alloc (words);
595   for (i = 0; i < words; ++i)
596     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
597 
598   PUT_CODE (reg, CONCATN);
599   XVEC (reg, 0) = v;
600 
601   if (dump_file)
602     {
603       fprintf (dump_file, "; Splitting reg %u ->", regno);
604       for (i = 0; i < words; ++i)
605 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
606       fputc ('\n', dump_file);
607     }
608 }
609 
610 /* Get a SUBREG of a CONCATN.  */
611 
612 static rtx
613 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
614 {
615   unsigned int outer_size, outer_words, inner_size, inner_words;
616   machine_mode innermode, partmode;
617   rtx part;
618   unsigned int final_offset;
619   unsigned int byte;
620 
621   innermode = GET_MODE (op);
622   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
623       || !interesting_mode_p (innermode, &inner_size, &inner_words))
624     gcc_unreachable ();
625 
626   /* Must be constant if interesting_mode_p passes.  */
627   byte = orig_byte.to_constant ();
628   gcc_assert (GET_CODE (op) == CONCATN);
629   gcc_assert (byte % outer_size == 0);
630 
631   gcc_assert (byte < inner_size);
632   if (outer_size > inner_size)
633     return NULL_RTX;
634 
635   inner_size /= XVECLEN (op, 0);
636   part = XVECEXP (op, 0, byte / inner_size);
637   partmode = GET_MODE (part);
638 
639   final_offset = byte % inner_size;
640   if (final_offset + outer_size > inner_size)
641     return NULL_RTX;
642 
643   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
644      regular CONST_VECTORs.  They have vector or integer modes, depending
645      on the capabilities of the target.  Cope with them.  */
646   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
647     partmode = GET_MODE_INNER (innermode);
648   else if (partmode == VOIDmode)
649     partmode = mode_for_size (inner_size * BITS_PER_UNIT,
650 			      GET_MODE_CLASS (innermode), 0).require ();
651 
652   return simplify_gen_subreg (outermode, part, partmode, final_offset);
653 }
654 
655 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
656 
657 static rtx
658 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
659 			     machine_mode innermode, unsigned int byte)
660 {
661   rtx ret;
662 
663   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
664      If OP is a SUBREG of a CONCATN, then it must be a simple mode
665      change with the same size and offset 0, or it must extract a
666      part.  We shouldn't see anything else here.  */
667   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
668     {
669       rtx op2;
670 
671       if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
672 		    GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
673 	  && known_eq (SUBREG_BYTE (op), 0))
674 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
675 					    GET_MODE (SUBREG_REG (op)), byte);
676 
677       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
678 				     SUBREG_BYTE (op));
679       if (op2 == NULL_RTX)
680 	{
681 	  /* We don't handle paradoxical subregs here.  */
682 	  gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
683 	  gcc_assert (!paradoxical_subreg_p (op));
684 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
685 					 byte + SUBREG_BYTE (op));
686 	  gcc_assert (op2 != NULL_RTX);
687 	  return op2;
688 	}
689 
690       op = op2;
691       gcc_assert (op != NULL_RTX);
692       gcc_assert (innermode == GET_MODE (op));
693     }
694 
695   if (GET_CODE (op) == CONCATN)
696     return simplify_subreg_concatn (outermode, op, byte);
697 
698   ret = simplify_gen_subreg (outermode, op, innermode, byte);
699 
700   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
701      resolve_simple_move will ask for the high part of the paradoxical
702      subreg, which does not have a value.  Just return a zero.  */
703   if (ret == NULL_RTX
704       && paradoxical_subreg_p (op))
705     return CONST0_RTX (outermode);
706 
707   gcc_assert (ret != NULL_RTX);
708   return ret;
709 }
710 
711 /* Return whether we should resolve X into the registers into which it
712    was decomposed.  */
713 
714 static bool
715 resolve_reg_p (rtx x)
716 {
717   return GET_CODE (x) == CONCATN;
718 }
719 
720 /* Return whether X is a SUBREG of a register which we need to
721    resolve.  */
722 
723 static bool
724 resolve_subreg_p (rtx x)
725 {
726   if (GET_CODE (x) != SUBREG)
727     return false;
728   return resolve_reg_p (SUBREG_REG (x));
729 }
730 
731 /* Look for SUBREGs in *LOC which need to be decomposed.  */
732 
733 static bool
734 resolve_subreg_use (rtx *loc, rtx insn)
735 {
736   subrtx_ptr_iterator::array_type array;
737   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
738     {
739       rtx *loc = *iter;
740       rtx x = *loc;
741       if (resolve_subreg_p (x))
742 	{
743 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 				       SUBREG_BYTE (x));
745 
746 	  /* It is possible for a note to contain a reference which we can
747 	     decompose.  In this case, return 1 to the caller to indicate
748 	     that the note must be removed.  */
749 	  if (!x)
750 	    {
751 	      gcc_assert (!insn);
752 	      return true;
753 	    }
754 
755 	  validate_change (insn, loc, x, 1);
756 	  iter.skip_subrtxes ();
757 	}
758       else if (resolve_reg_p (x))
759 	/* Return 1 to the caller to indicate that we found a direct
760 	   reference to a register which is being decomposed.  This can
761 	   happen inside notes, multiword shift or zero-extend
762 	   instructions.  */
763 	return true;
764     }
765 
766   return false;
767 }
768 
769 /* Resolve any decomposed registers which appear in register notes on
770    INSN.  */
771 
772 static void
773 resolve_reg_notes (rtx_insn *insn)
774 {
775   rtx *pnote, note;
776 
777   note = find_reg_equal_equiv_note (insn);
778   if (note)
779     {
780       int old_count = num_validated_changes ();
781       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
782 	remove_note (insn, note);
783       else
784 	if (old_count != num_validated_changes ())
785 	  df_notes_rescan (insn);
786     }
787 
788   pnote = &REG_NOTES (insn);
789   while (*pnote != NULL_RTX)
790     {
791       bool del = false;
792 
793       note = *pnote;
794       switch (REG_NOTE_KIND (note))
795 	{
796 	case REG_DEAD:
797 	case REG_UNUSED:
798 	  if (resolve_reg_p (XEXP (note, 0)))
799 	    del = true;
800 	  break;
801 
802 	default:
803 	  break;
804 	}
805 
806       if (del)
807 	*pnote = XEXP (note, 1);
808       else
809 	pnote = &XEXP (note, 1);
810     }
811 }
812 
813 /* Return whether X can be decomposed into subwords.  */
814 
815 static bool
816 can_decompose_p (rtx x)
817 {
818   if (REG_P (x))
819     {
820       unsigned int regno = REGNO (x);
821 
822       if (HARD_REGISTER_NUM_P (regno))
823 	{
824 	  unsigned int byte, num_bytes, num_words;
825 
826 	  if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
827 	    return false;
828 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
829 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
830 	      return false;
831 	  return true;
832 	}
833       else
834 	return !bitmap_bit_p (subreg_context, regno);
835     }
836 
837   return true;
838 }
839 
840 /* Decompose the registers used in a simple move SET within INSN.  If
841    we don't change anything, return INSN, otherwise return the start
842    of the sequence of moves.  */
843 
844 static rtx_insn *
845 resolve_simple_move (rtx set, rtx_insn *insn)
846 {
847   rtx src, dest, real_dest;
848   rtx_insn *insns;
849   machine_mode orig_mode, dest_mode;
850   unsigned int orig_size, words;
851   bool pushing;
852 
853   src = SET_SRC (set);
854   dest = SET_DEST (set);
855   orig_mode = GET_MODE (dest);
856 
857   if (!interesting_mode_p (orig_mode, &orig_size, &words))
858     gcc_unreachable ();
859   gcc_assert (words > 1);
860 
861   start_sequence ();
862 
863   /* We have to handle copying from a SUBREG of a decomposed reg where
864      the SUBREG is larger than word size.  Rather than assume that we
865      can take a word_mode SUBREG of the destination, we copy to a new
866      register and then copy that to the destination.  */
867 
868   real_dest = NULL_RTX;
869 
870   if (GET_CODE (src) == SUBREG
871       && resolve_reg_p (SUBREG_REG (src))
872       && (maybe_ne (SUBREG_BYTE (src), 0)
873 	  || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
874     {
875       real_dest = dest;
876       dest = gen_reg_rtx (orig_mode);
877       if (REG_P (real_dest))
878 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
879     }
880 
881   /* Similarly if we are copying to a SUBREG of a decomposed reg where
882      the SUBREG is larger than word size.  */
883 
884   if (GET_CODE (dest) == SUBREG
885       && resolve_reg_p (SUBREG_REG (dest))
886       && (maybe_ne (SUBREG_BYTE (dest), 0)
887 	  || maybe_ne (orig_size,
888 		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
889     {
890       rtx reg, smove;
891       rtx_insn *minsn;
892 
893       reg = gen_reg_rtx (orig_mode);
894       minsn = emit_move_insn (reg, src);
895       smove = single_set (minsn);
896       gcc_assert (smove != NULL_RTX);
897       resolve_simple_move (smove, minsn);
898       src = reg;
899     }
900 
901   /* If we didn't have any big SUBREGS of decomposed registers, and
902      neither side of the move is a register we are decomposing, then
903      we don't have to do anything here.  */
904 
905   if (src == SET_SRC (set)
906       && dest == SET_DEST (set)
907       && !resolve_reg_p (src)
908       && !resolve_subreg_p (src)
909       && !resolve_reg_p (dest)
910       && !resolve_subreg_p (dest))
911     {
912       end_sequence ();
913       return insn;
914     }
915 
916   /* It's possible for the code to use a subreg of a decomposed
917      register while forming an address.  We need to handle that before
918      passing the address to emit_move_insn.  We pass NULL_RTX as the
919      insn parameter to resolve_subreg_use because we can not validate
920      the insn yet.  */
921   if (MEM_P (src) || MEM_P (dest))
922     {
923       int acg;
924 
925       if (MEM_P (src))
926 	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
927       if (MEM_P (dest))
928 	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
929       acg = apply_change_group ();
930       gcc_assert (acg);
931     }
932 
933   /* If SRC is a register which we can't decompose, or has side
934      effects, we need to move via a temporary register.  */
935 
936   if (!can_decompose_p (src)
937       || side_effects_p (src)
938       || GET_CODE (src) == ASM_OPERANDS)
939     {
940       rtx reg;
941 
942       reg = gen_reg_rtx (orig_mode);
943 
944       if (AUTO_INC_DEC)
945 	{
946 	  rtx_insn *move = emit_move_insn (reg, src);
947 	  if (MEM_P (src))
948 	    {
949 	      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
950 	      if (note)
951 		add_reg_note (move, REG_INC, XEXP (note, 0));
952 	    }
953 	}
954       else
955 	emit_move_insn (reg, src);
956 
957       src = reg;
958     }
959 
960   /* If DEST is a register which we can't decompose, or has side
961      effects, we need to first move to a temporary register.  We
962      handle the common case of pushing an operand directly.  We also
963      go through a temporary register if it holds a floating point
964      value.  This gives us better code on systems which can't move
965      data easily between integer and floating point registers.  */
966 
967   dest_mode = orig_mode;
968   pushing = push_operand (dest, dest_mode);
969   if (!can_decompose_p (dest)
970       || (side_effects_p (dest) && !pushing)
971       || (!SCALAR_INT_MODE_P (dest_mode)
972 	  && !resolve_reg_p (dest)
973 	  && !resolve_subreg_p (dest)))
974     {
975       if (real_dest == NULL_RTX)
976 	real_dest = dest;
977       if (!SCALAR_INT_MODE_P (dest_mode))
978 	dest_mode = int_mode_for_mode (dest_mode).require ();
979       dest = gen_reg_rtx (dest_mode);
980       if (REG_P (real_dest))
981 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
982     }
983 
984   if (pushing)
985     {
986       unsigned int i, j, jinc;
987 
988       gcc_assert (orig_size % UNITS_PER_WORD == 0);
989       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
990       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
991 
992       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
993 	{
994 	  j = 0;
995 	  jinc = 1;
996 	}
997       else
998 	{
999 	  j = words - 1;
1000 	  jinc = -1;
1001 	}
1002 
1003       for (i = 0; i < words; ++i, j += jinc)
1004 	{
1005 	  rtx temp;
1006 
1007 	  temp = copy_rtx (XEXP (dest, 0));
1008 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1009 					       j * UNITS_PER_WORD);
1010 	  emit_move_insn (temp,
1011 			  simplify_gen_subreg_concatn (word_mode, src,
1012 						       orig_mode,
1013 						       j * UNITS_PER_WORD));
1014 	}
1015     }
1016   else
1017     {
1018       unsigned int i;
1019 
1020       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1021 	emit_clobber (dest);
1022 
1023       for (i = 0; i < words; ++i)
1024 	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1025 						     dest_mode,
1026 						     i * UNITS_PER_WORD),
1027 			simplify_gen_subreg_concatn (word_mode, src,
1028 						     orig_mode,
1029 						     i * UNITS_PER_WORD));
1030     }
1031 
1032   if (real_dest != NULL_RTX)
1033     {
1034       rtx mdest, smove;
1035       rtx_insn *minsn;
1036 
1037       if (dest_mode == orig_mode)
1038 	mdest = dest;
1039       else
1040 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1041       minsn = emit_move_insn (real_dest, mdest);
1042 
1043   if (AUTO_INC_DEC && MEM_P (real_dest)
1044       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1045     {
1046       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1047       if (note)
1048 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1049     }
1050 
1051       smove = single_set (minsn);
1052       gcc_assert (smove != NULL_RTX);
1053 
1054       resolve_simple_move (smove, minsn);
1055     }
1056 
1057   insns = get_insns ();
1058   end_sequence ();
1059 
1060   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1061 
1062   emit_insn_before (insns, insn);
1063 
1064   /* If we get here via self-recursion, then INSN is not yet in the insns
1065      chain and delete_insn will fail.  We only want to remove INSN from the
1066      current sequence.  See PR56738.  */
1067   if (in_sequence_p ())
1068     remove_insn (insn);
1069   else
1070     delete_insn (insn);
1071 
1072   return insns;
1073 }
1074 
1075 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1076    component registers.  Return whether we changed something.  */
1077 
1078 static bool
1079 resolve_clobber (rtx pat, rtx_insn *insn)
1080 {
1081   rtx reg;
1082   machine_mode orig_mode;
1083   unsigned int orig_size, words, i;
1084   int ret;
1085 
1086   reg = XEXP (pat, 0);
1087   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1088     return false;
1089 
1090   orig_mode = GET_MODE (reg);
1091   if (!interesting_mode_p (orig_mode, &orig_size, &words))
1092     gcc_unreachable ();
1093 
1094   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1095 			 simplify_gen_subreg_concatn (word_mode, reg,
1096 						      orig_mode, 0),
1097 			 0);
1098   df_insn_rescan (insn);
1099   gcc_assert (ret != 0);
1100 
1101   for (i = words - 1; i > 0; --i)
1102     {
1103       rtx x;
1104 
1105       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1106 				       i * UNITS_PER_WORD);
1107       x = gen_rtx_CLOBBER (VOIDmode, x);
1108       emit_insn_after (x, insn);
1109     }
1110 
1111   resolve_reg_notes (insn);
1112 
1113   return true;
1114 }
1115 
1116 /* A USE of a decomposed register is no longer meaningful.  Return
1117    whether we changed something.  */
1118 
1119 static bool
1120 resolve_use (rtx pat, rtx_insn *insn)
1121 {
1122   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1123     {
1124       delete_insn (insn);
1125       return true;
1126     }
1127 
1128   resolve_reg_notes (insn);
1129 
1130   return false;
1131 }
1132 
1133 /* A VAR_LOCATION can be simplified.  */
1134 
1135 static void
1136 resolve_debug (rtx_insn *insn)
1137 {
1138   subrtx_ptr_iterator::array_type array;
1139   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1140     {
1141       rtx *loc = *iter;
1142       rtx x = *loc;
1143       if (resolve_subreg_p (x))
1144 	{
1145 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1146 				       SUBREG_BYTE (x));
1147 
1148 	  if (x)
1149 	    *loc = x;
1150 	  else
1151 	    x = copy_rtx (*loc);
1152 	}
1153       if (resolve_reg_p (x))
1154 	*loc = copy_rtx (x);
1155     }
1156 
1157   df_insn_rescan (insn);
1158 
1159   resolve_reg_notes (insn);
1160 }
1161 
1162 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1163    set the decomposable_context bitmap accordingly.  SPEED_P is true
1164    if we are optimizing INSN for speed rather than size.  Return true
1165    if INSN is decomposable.  */
1166 
1167 static bool
1168 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1169 {
1170   rtx set;
1171   rtx op;
1172   rtx op_operand;
1173 
1174   set = single_set (insn);
1175   if (!set)
1176     return false;
1177 
1178   op = SET_SRC (set);
1179   if (GET_CODE (op) != ASHIFT
1180       && GET_CODE (op) != LSHIFTRT
1181       && GET_CODE (op) != ASHIFTRT
1182       && GET_CODE (op) != ZERO_EXTEND)
1183     return false;
1184 
1185   op_operand = XEXP (op, 0);
1186   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1187       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1188       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1189       || GET_MODE (op) != twice_word_mode)
1190     return false;
1191 
1192   if (GET_CODE (op) == ZERO_EXTEND)
1193     {
1194       if (GET_MODE (op_operand) != word_mode
1195 	  || !choices[speed_p].splitting_zext)
1196 	return false;
1197     }
1198   else /* left or right shift */
1199     {
1200       bool *splitting = (GET_CODE (op) == ASHIFT
1201 			 ? choices[speed_p].splitting_ashift
1202 			 : GET_CODE (op) == ASHIFTRT
1203 			 ? choices[speed_p].splitting_ashiftrt
1204 			 : choices[speed_p].splitting_lshiftrt);
1205       if (!CONST_INT_P (XEXP (op, 1))
1206 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1207 			2 * BITS_PER_WORD - 1)
1208 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1209 	return false;
1210 
1211       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1212     }
1213 
1214   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1215 
1216   return true;
1217 }
1218 
1219 /* Decompose a more than word wide shift (in INSN) of a multiword
1220    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1221    and 'set to zero' insn.  Return a pointer to the new insn when a
1222    replacement was done.  */
1223 
1224 static rtx_insn *
1225 resolve_shift_zext (rtx_insn *insn)
1226 {
1227   rtx set;
1228   rtx op;
1229   rtx op_operand;
1230   rtx_insn *insns;
1231   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1232   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1233   scalar_int_mode inner_mode;
1234 
1235   set = single_set (insn);
1236   if (!set)
1237     return NULL;
1238 
1239   op = SET_SRC (set);
1240   if (GET_CODE (op) != ASHIFT
1241       && GET_CODE (op) != LSHIFTRT
1242       && GET_CODE (op) != ASHIFTRT
1243       && GET_CODE (op) != ZERO_EXTEND)
1244     return NULL;
1245 
1246   op_operand = XEXP (op, 0);
1247   if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1248     return NULL;
1249 
1250   /* We can tear this operation apart only if the regs were already
1251      torn apart.  */
1252   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1253     return NULL;
1254 
1255   /* src_reg_num is the number of the word mode register which we
1256      are operating on.  For a left shift and a zero_extend on little
1257      endian machines this is register 0.  */
1258   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1259 		? 1 : 0;
1260 
1261   if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1262     src_reg_num = 1 - src_reg_num;
1263 
1264   if (GET_CODE (op) == ZERO_EXTEND)
1265     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1266   else
1267     dest_reg_num = 1 - src_reg_num;
1268 
1269   offset1 = UNITS_PER_WORD * dest_reg_num;
1270   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1271   src_offset = UNITS_PER_WORD * src_reg_num;
1272 
1273   start_sequence ();
1274 
1275   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1276                                           GET_MODE (SET_DEST (set)),
1277                                           offset1);
1278   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1279 					    GET_MODE (SET_DEST (set)),
1280 					    offset2);
1281   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1282                                          GET_MODE (op_operand),
1283                                          src_offset);
1284   if (GET_CODE (op) == ASHIFTRT
1285       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1286     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1287 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1288 
1289   if (GET_CODE (op) != ZERO_EXTEND)
1290     {
1291       int shift_count = INTVAL (XEXP (op, 1));
1292       if (shift_count > BITS_PER_WORD)
1293 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1294 				LSHIFT_EXPR : RSHIFT_EXPR,
1295 				word_mode, src_reg,
1296 				shift_count - BITS_PER_WORD,
1297 				dest_reg, GET_CODE (op) != ASHIFTRT);
1298     }
1299 
1300   if (dest_reg != src_reg)
1301     emit_move_insn (dest_reg, src_reg);
1302   if (GET_CODE (op) != ASHIFTRT)
1303     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1304   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1305     emit_move_insn (dest_upper, copy_rtx (src_reg));
1306   else
1307     emit_move_insn (dest_upper, upper_src);
1308   insns = get_insns ();
1309 
1310   end_sequence ();
1311 
1312   emit_insn_before (insns, insn);
1313 
1314   if (dump_file)
1315     {
1316       rtx_insn *in;
1317       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1318       for (in = insns; in != insn; in = NEXT_INSN (in))
1319 	fprintf (dump_file, "%d ", INSN_UID (in));
1320       fprintf (dump_file, "\n");
1321     }
1322 
1323   delete_insn (insn);
1324   return insns;
1325 }
1326 
1327 /* Print to dump_file a description of what we're doing with shift code CODE.
1328    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1329 
1330 static void
1331 dump_shift_choices (enum rtx_code code, bool *splitting)
1332 {
1333   int i;
1334   const char *sep;
1335 
1336   fprintf (dump_file,
1337 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1338 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1339   sep = "";
1340   for (i = 0; i < BITS_PER_WORD; i++)
1341     if (splitting[i])
1342       {
1343 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1344 	sep = ",";
1345       }
1346   fprintf (dump_file, "\n");
1347 }
1348 
1349 /* Print to dump_file a description of what we're doing when optimizing
1350    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1351    of the SPEED_P choice.  */
1352 
1353 static void
1354 dump_choices (bool speed_p, const char *description)
1355 {
1356   unsigned int size, factor, i;
1357 
1358   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1359 
1360   for (i = 0; i < MAX_MACHINE_MODE; i++)
1361     if (interesting_mode_p ((machine_mode) i, &size, &factor)
1362 	&& factor > 1)
1363       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1364 	       choices[speed_p].move_modes_to_split[i]
1365 	       ? "Splitting"
1366 	       : "Skipping",
1367 	       GET_MODE_NAME ((machine_mode) i));
1368 
1369   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1370 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1371 	   GET_MODE_NAME (twice_word_mode));
1372 
1373   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1374   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1375   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1376   fprintf (dump_file, "\n");
1377 }
1378 
1379 /* Look for registers which are always accessed via word-sized SUBREGs
1380    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1381    registers into several word-sized pseudo-registers.  */
1382 
1383 static void
1384 decompose_multiword_subregs (bool decompose_copies)
1385 {
1386   unsigned int max;
1387   basic_block bb;
1388   bool speed_p;
1389 
1390   if (dump_file)
1391     {
1392       dump_choices (false, "size");
1393       dump_choices (true, "speed");
1394     }
1395 
1396   /* Check if this target even has any modes to consider lowering.   */
1397   if (!choices[false].something_to_do && !choices[true].something_to_do)
1398     {
1399       if (dump_file)
1400 	fprintf (dump_file, "Nothing to do!\n");
1401       return;
1402     }
1403 
1404   max = max_reg_num ();
1405 
1406   /* First see if there are any multi-word pseudo-registers.  If there
1407      aren't, there is nothing we can do.  This should speed up this
1408      pass in the normal case, since it should be faster than scanning
1409      all the insns.  */
1410   {
1411     unsigned int i;
1412     bool useful_modes_seen = false;
1413 
1414     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1415       if (regno_reg_rtx[i] != NULL)
1416 	{
1417 	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1418 	  if (choices[false].move_modes_to_split[(int) mode]
1419 	      || choices[true].move_modes_to_split[(int) mode])
1420 	    {
1421 	      useful_modes_seen = true;
1422 	      break;
1423 	    }
1424 	}
1425 
1426     if (!useful_modes_seen)
1427       {
1428 	if (dump_file)
1429 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1430 	return;
1431       }
1432   }
1433 
1434   if (df)
1435     {
1436       df_set_flags (DF_DEFER_INSN_RESCAN);
1437       run_word_dce ();
1438     }
1439 
1440   /* FIXME: It may be possible to change this code to look for each
1441      multi-word pseudo-register and to find each insn which sets or
1442      uses that register.  That should be faster than scanning all the
1443      insns.  */
1444 
1445   decomposable_context = BITMAP_ALLOC (NULL);
1446   non_decomposable_context = BITMAP_ALLOC (NULL);
1447   subreg_context = BITMAP_ALLOC (NULL);
1448 
1449   reg_copy_graph.create (max);
1450   reg_copy_graph.safe_grow_cleared (max);
1451   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1452 
1453   speed_p = optimize_function_for_speed_p (cfun);
1454   FOR_EACH_BB_FN (bb, cfun)
1455     {
1456       rtx_insn *insn;
1457 
1458       FOR_BB_INSNS (bb, insn)
1459 	{
1460 	  rtx set;
1461 	  enum classify_move_insn cmi;
1462 	  int i, n;
1463 
1464 	  if (!INSN_P (insn)
1465 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1466 	      || GET_CODE (PATTERN (insn)) == USE)
1467 	    continue;
1468 
1469 	  recog_memoized (insn);
1470 
1471 	  if (find_decomposable_shift_zext (insn, speed_p))
1472 	    continue;
1473 
1474 	  extract_insn (insn);
1475 
1476 	  set = simple_move (insn, speed_p);
1477 
1478 	  if (!set)
1479 	    cmi = NOT_SIMPLE_MOVE;
1480 	  else
1481 	    {
1482 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1483 		 second pass only.  The first pass is so early that there is
1484 		 good chance such moves will be optimized away completely by
1485 		 subsequent optimizations anyway.
1486 
1487 		 However, we call find_pseudo_copy even during the first pass
1488 		 so as to properly set up the reg_copy_graph.  */
1489 	      if (find_pseudo_copy (set))
1490 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1491 	      else
1492 		cmi = SIMPLE_MOVE;
1493 	    }
1494 
1495 	  n = recog_data.n_operands;
1496 	  for (i = 0; i < n; ++i)
1497 	    {
1498 	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1499 
1500 	      /* We handle ASM_OPERANDS as a special case to support
1501 		 things like x86 rdtsc which returns a DImode value.
1502 		 We can decompose the output, which will certainly be
1503 		 operand 0, but not the inputs.  */
1504 
1505 	      if (cmi == SIMPLE_MOVE
1506 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1507 		{
1508 		  gcc_assert (i == 0);
1509 		  cmi = NOT_SIMPLE_MOVE;
1510 		}
1511 	    }
1512 	}
1513     }
1514 
1515   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1516   if (!bitmap_empty_p (decomposable_context))
1517     {
1518       unsigned int i;
1519       sbitmap_iterator sbi;
1520       bitmap_iterator iter;
1521       unsigned int regno;
1522 
1523       propagate_pseudo_copies ();
1524 
1525       auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1526       bitmap_clear (sub_blocks);
1527 
1528       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1529 	decompose_register (regno);
1530 
1531       FOR_EACH_BB_FN (bb, cfun)
1532 	{
1533 	  rtx_insn *insn;
1534 
1535 	  FOR_BB_INSNS (bb, insn)
1536 	    {
1537 	      rtx pat;
1538 
1539 	      if (!INSN_P (insn))
1540 		continue;
1541 
1542 	      pat = PATTERN (insn);
1543 	      if (GET_CODE (pat) == CLOBBER)
1544 		resolve_clobber (pat, insn);
1545 	      else if (GET_CODE (pat) == USE)
1546 		resolve_use (pat, insn);
1547 	      else if (DEBUG_INSN_P (insn))
1548 		resolve_debug (insn);
1549 	      else
1550 		{
1551 		  rtx set;
1552 		  int i;
1553 
1554 		  recog_memoized (insn);
1555 		  extract_insn (insn);
1556 
1557 		  set = simple_move (insn, speed_p);
1558 		  if (set)
1559 		    {
1560 		      rtx_insn *orig_insn = insn;
1561 		      bool cfi = control_flow_insn_p (insn);
1562 
1563 		      /* We can end up splitting loads to multi-word pseudos
1564 			 into separate loads to machine word size pseudos.
1565 			 When this happens, we first had one load that can
1566 			 throw, and after resolve_simple_move we'll have a
1567 			 bunch of loads (at least two).  All those loads may
1568 			 trap if we can have non-call exceptions, so they
1569 			 all will end the current basic block.  We split the
1570 			 block after the outer loop over all insns, but we
1571 			 make sure here that we will be able to split the
1572 			 basic block and still produce the correct control
1573 			 flow graph for it.  */
1574 		      gcc_assert (!cfi
1575 				  || (cfun->can_throw_non_call_exceptions
1576 				      && can_throw_internal (insn)));
1577 
1578 		      insn = resolve_simple_move (set, insn);
1579 		      if (insn != orig_insn)
1580 			{
1581 			  recog_memoized (insn);
1582 			  extract_insn (insn);
1583 
1584 			  if (cfi)
1585 			    bitmap_set_bit (sub_blocks, bb->index);
1586 			}
1587 		    }
1588 		  else
1589 		    {
1590 		      rtx_insn *decomposed_shift;
1591 
1592 		      decomposed_shift = resolve_shift_zext (insn);
1593 		      if (decomposed_shift != NULL_RTX)
1594 			{
1595 			  insn = decomposed_shift;
1596 			  recog_memoized (insn);
1597 			  extract_insn (insn);
1598 			}
1599 		    }
1600 
1601 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1602 		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1603 
1604 		  resolve_reg_notes (insn);
1605 
1606 		  if (num_validated_changes () > 0)
1607 		    {
1608 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1609 			{
1610 			  rtx *pl = recog_data.dup_loc[i];
1611 			  int dup_num = recog_data.dup_num[i];
1612 			  rtx *px = recog_data.operand_loc[dup_num];
1613 
1614 			  validate_unshare_change (insn, pl, *px, 1);
1615 			}
1616 
1617 		      i = apply_change_group ();
1618 		      gcc_assert (i);
1619 		    }
1620 		}
1621 	    }
1622 	}
1623 
1624       /* If we had insns to split that caused control flow insns in the middle
1625 	 of a basic block, split those blocks now.  Note that we only handle
1626 	 the case where splitting a load has caused multiple possibly trapping
1627 	 loads to appear.  */
1628       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1629 	{
1630 	  rtx_insn *insn, *end;
1631 	  edge fallthru;
1632 
1633 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1634 	  insn = BB_HEAD (bb);
1635 	  end = BB_END (bb);
1636 
1637 	  while (insn != end)
1638 	    {
1639 	      if (control_flow_insn_p (insn))
1640 		{
1641 		  /* Split the block after insn.  There will be a fallthru
1642 		     edge, which is OK so we keep it.  We have to create the
1643 		     exception edges ourselves.  */
1644 		  fallthru = split_block (bb, insn);
1645 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1646 		  bb = fallthru->dest;
1647 		  insn = BB_HEAD (bb);
1648 		}
1649 	      else
1650 	        insn = NEXT_INSN (insn);
1651 	    }
1652 	}
1653     }
1654 
1655   {
1656     unsigned int i;
1657     bitmap b;
1658 
1659     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1660       if (b)
1661 	BITMAP_FREE (b);
1662   }
1663 
1664   reg_copy_graph.release ();
1665 
1666   BITMAP_FREE (decomposable_context);
1667   BITMAP_FREE (non_decomposable_context);
1668   BITMAP_FREE (subreg_context);
1669 }
1670 
1671 /* Implement first lower subreg pass.  */
1672 
1673 namespace {
1674 
1675 const pass_data pass_data_lower_subreg =
1676 {
1677   RTL_PASS, /* type */
1678   "subreg1", /* name */
1679   OPTGROUP_NONE, /* optinfo_flags */
1680   TV_LOWER_SUBREG, /* tv_id */
1681   0, /* properties_required */
1682   0, /* properties_provided */
1683   0, /* properties_destroyed */
1684   0, /* todo_flags_start */
1685   0, /* todo_flags_finish */
1686 };
1687 
1688 class pass_lower_subreg : public rtl_opt_pass
1689 {
1690 public:
1691   pass_lower_subreg (gcc::context *ctxt)
1692     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1693   {}
1694 
1695   /* opt_pass methods: */
1696   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1697   virtual unsigned int execute (function *)
1698     {
1699       decompose_multiword_subregs (false);
1700       return 0;
1701     }
1702 
1703 }; // class pass_lower_subreg
1704 
1705 } // anon namespace
1706 
1707 rtl_opt_pass *
1708 make_pass_lower_subreg (gcc::context *ctxt)
1709 {
1710   return new pass_lower_subreg (ctxt);
1711 }
1712 
1713 /* Implement second lower subreg pass.  */
1714 
1715 namespace {
1716 
1717 const pass_data pass_data_lower_subreg2 =
1718 {
1719   RTL_PASS, /* type */
1720   "subreg2", /* name */
1721   OPTGROUP_NONE, /* optinfo_flags */
1722   TV_LOWER_SUBREG, /* tv_id */
1723   0, /* properties_required */
1724   0, /* properties_provided */
1725   0, /* properties_destroyed */
1726   0, /* todo_flags_start */
1727   TODO_df_finish, /* todo_flags_finish */
1728 };
1729 
1730 class pass_lower_subreg2 : public rtl_opt_pass
1731 {
1732 public:
1733   pass_lower_subreg2 (gcc::context *ctxt)
1734     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1735   {}
1736 
1737   /* opt_pass methods: */
1738   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1739   virtual unsigned int execute (function *)
1740     {
1741       decompose_multiword_subregs (true);
1742       return 0;
1743     }
1744 
1745 }; // class pass_lower_subreg2
1746 
1747 } // anon namespace
1748 
1749 rtl_opt_pass *
1750 make_pass_lower_subreg2 (gcc::context *ctxt)
1751 {
1752   return new pass_lower_subreg2 (ctxt);
1753 }
1754