1 /* Decompose multiword subregs.
2    Copyright (C) 2007-2018 Free Software Foundation, Inc.
3    Contributed by Richard Henderson <rth@redhat.com>
4 		  Ian Lance Taylor <iant@google.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44 
45 
46 /* Decompose multi-word pseudo-registers into individual
47    pseudo-registers when possible and profitable.  This is possible
48    when all the uses of a multi-word register are via SUBREG, or are
49    copies of the register to another location.  Breaking apart the
50    register permits more CSE and permits better register allocation.
51    This is profitable if the machine does not have move instructions
52    to do this.
53 
54    This pass only splits moves with modes that are wider than
55    word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56    integer modes that are twice the width of word_mode.  The latter
57    could be generalized if there was a need to do this, but the trend in
58    architectures is to not need this.
59 
60    There are two useful preprocessor defines for use by maintainers:
61 
62    #define LOG_COSTS 1
63 
64    if you wish to see the actual cost estimates that are being used
65    for each mode wider than word mode and the cost estimates for zero
66    extension and the shifts.   This can be useful when port maintainers
67    are tuning insn rtx costs.
68 
69    #define FORCE_LOWERING 1
70 
71    if you wish to test the pass with all the transformation forced on.
72    This can be useful for finding bugs in the transformations.  */
73 
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76 
77 /* Bit N in this bitmap is set if regno N is used in a context in
78    which we can decompose it.  */
79 static bitmap decomposable_context;
80 
81 /* Bit N in this bitmap is set if regno N is used in a context in
82    which it can not be decomposed.  */
83 static bitmap non_decomposable_context;
84 
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86    which changes the mode but not the size.  This typically happens
87    when the register accessed as a floating-point value; we want to
88    avoid generating accesses to its subwords in integer modes.  */
89 static bitmap subreg_context;
90 
91 /* Bit N in the bitmap in element M of this array is set if there is a
92    copy from reg M to reg N.  */
93 static vec<bitmap> reg_copy_graph;
94 
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98   = &default_target_lower_subreg;
99 #endif
100 
101 #define twice_word_mode \
102   this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104   this_target_lower_subreg->x_choices
105 
106 /* Return true if MODE is a mode we know how to lower.  When returning true,
107    store its byte size in *BYTES and its word size in *WORDS.  */
108 
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 		    unsigned int *words)
112 {
113   if (!GET_MODE_SIZE (mode).is_constant (bytes))
114     return false;
115   *words = CEIL (*bytes, UNITS_PER_WORD);
116   return true;
117 }
118 
119 /* RTXes used while computing costs.  */
120 struct cost_rtxes {
121   /* Source and target registers.  */
122   rtx source;
123   rtx target;
124 
125   /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
126   rtx zext;
127 
128   /* A shift of SOURCE.  */
129   rtx shift;
130 
131   /* A SET of TARGET.  */
132   rtx set;
133 };
134 
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136    rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */
137 
138 static int
139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 	    machine_mode mode, int op1)
141 {
142   PUT_CODE (rtxes->shift, code);
143   PUT_MODE (rtxes->shift, mode);
144   PUT_MODE (rtxes->source, mode);
145   XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146   return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148 
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150    to true if it is profitable to split a double-word CODE shift
151    of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
152    for speed or size profitability.
153 
154    Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
155    the cost of moving zero into a word-mode register.  WORD_MOVE_COST
156    is the cost of moving between word registers.  */
157 
158 static void
159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 			 bool *splitting, enum rtx_code code,
161 			 int word_move_zero_cost, int word_move_cost)
162 {
163   int wide_cost, narrow_cost, upper_cost, i;
164 
165   for (i = 0; i < BITS_PER_WORD; i++)
166     {
167       wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 			      i + BITS_PER_WORD);
169       if (i == 0)
170 	narrow_cost = word_move_cost;
171       else
172 	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173 
174       if (code != ASHIFTRT)
175 	upper_cost = word_move_zero_cost;
176       else if (i == BITS_PER_WORD - 1)
177 	upper_cost = word_move_cost;
178       else
179 	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 				 BITS_PER_WORD - 1);
181 
182       if (LOG_COSTS)
183 	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186 
187       if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 	splitting[i] = true;
189     }
190 }
191 
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193    selects which.  Use RTXES for computing costs.  */
194 
195 static void
196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198   unsigned int i;
199   int word_move_zero_cost, word_move_cost;
200 
201   PUT_MODE (rtxes->target, word_mode);
202   SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203   word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 
205   SET_SRC (rtxes->set) = rtxes->source;
206   word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 
208   if (LOG_COSTS)
209     fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211 
212   for (i = 0; i < MAX_MACHINE_MODE; i++)
213     {
214       machine_mode mode = (machine_mode) i;
215       unsigned int size, factor;
216       if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 	{
218 	  unsigned int mode_move_cost;
219 
220 	  PUT_MODE (rtxes->target, mode);
221 	  PUT_MODE (rtxes->source, mode);
222 	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 
224 	  if (LOG_COSTS)
225 	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 		     GET_MODE_NAME (mode), mode_move_cost,
227 		     word_move_cost, factor);
228 
229 	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 	    {
231 	      choices[speed_p].move_modes_to_split[i] = true;
232 	      choices[speed_p].something_to_do = true;
233 	    }
234 	}
235     }
236 
237   /* For the moves and shifts, the only case that is checked is one
238      where the mode of the target is an integer mode twice the width
239      of the word_mode.
240 
241      If it is not profitable to split a double word move then do not
242      even consider the shifts or the zero extension.  */
243   if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244     {
245       int zext_cost;
246 
247       /* The only case here to check to see if moving the upper part with a
248 	 zero is cheaper than doing the zext itself.  */
249       PUT_MODE (rtxes->source, word_mode);
250       zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 
252       if (LOG_COSTS)
253 	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 		 zext_cost, word_move_cost, word_move_zero_cost);
256 
257       if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 	choices[speed_p].splitting_zext = true;
259 
260       compute_splitting_shift (speed_p, rtxes,
261 			       choices[speed_p].splitting_ashift, ASHIFT,
262 			       word_move_zero_cost, word_move_cost);
263       compute_splitting_shift (speed_p, rtxes,
264 			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 			       word_move_zero_cost, word_move_cost);
266       compute_splitting_shift (speed_p, rtxes,
267 			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 			       word_move_zero_cost, word_move_cost);
269     }
270 }
271 
272 /* Do one-per-target initialisation.  This involves determining
273    which operations on the machine are profitable.  If none are found,
274    then the pass just returns when called.  */
275 
276 void
277 init_lower_subreg (void)
278 {
279   struct cost_rtxes rtxes;
280 
281   memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282 
283   twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284 
285   rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286   rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287   rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288   rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289   rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290 
291   if (LOG_COSTS)
292     fprintf (stderr, "\nSize costs\n==========\n\n");
293   compute_costs (false, &rtxes);
294 
295   if (LOG_COSTS)
296     fprintf (stderr, "\nSpeed costs\n===========\n\n");
297   compute_costs (true, &rtxes);
298 }
299 
300 static bool
301 simple_move_operand (rtx x)
302 {
303   if (GET_CODE (x) == SUBREG)
304     x = SUBREG_REG (x);
305 
306   if (!OBJECT_P (x))
307     return false;
308 
309   if (GET_CODE (x) == LABEL_REF
310       || GET_CODE (x) == SYMBOL_REF
311       || GET_CODE (x) == HIGH
312       || GET_CODE (x) == CONST)
313     return false;
314 
315   if (MEM_P (x)
316       && (MEM_VOLATILE_P (x)
317 	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318     return false;
319 
320   return true;
321 }
322 
323 /* If INSN is a single set between two objects that we want to split,
324    return the single set.  SPEED_P says whether we are optimizing
325    INSN for speed or size.
326 
327    INSN should have been passed to recog and extract_insn before this
328    is called.  */
329 
330 static rtx
331 simple_move (rtx_insn *insn, bool speed_p)
332 {
333   rtx x;
334   rtx set;
335   machine_mode mode;
336 
337   if (recog_data.n_operands != 2)
338     return NULL_RTX;
339 
340   set = single_set (insn);
341   if (!set)
342     return NULL_RTX;
343 
344   x = SET_DEST (set);
345   if (x != recog_data.operand[0] && x != recog_data.operand[1])
346     return NULL_RTX;
347   if (!simple_move_operand (x))
348     return NULL_RTX;
349 
350   x = SET_SRC (set);
351   if (x != recog_data.operand[0] && x != recog_data.operand[1])
352     return NULL_RTX;
353   /* For the src we can handle ASM_OPERANDS, and it is beneficial for
354      things like x86 rdtsc which returns a DImode value.  */
355   if (GET_CODE (x) != ASM_OPERANDS
356       && !simple_move_operand (x))
357     return NULL_RTX;
358 
359   /* We try to decompose in integer modes, to avoid generating
360      inefficient code copying between integer and floating point
361      registers.  That means that we can't decompose if this is a
362      non-integer mode for which there is no integer mode of the same
363      size.  */
364   mode = GET_MODE (SET_DEST (set));
365   if (!SCALAR_INT_MODE_P (mode)
366       && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
367     return NULL_RTX;
368 
369   /* Reject PARTIAL_INT modes.  They are used for processor specific
370      purposes and it's probably best not to tamper with them.  */
371   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
372     return NULL_RTX;
373 
374   if (!choices[speed_p].move_modes_to_split[(int) mode])
375     return NULL_RTX;
376 
377   return set;
378 }
379 
380 /* If SET is a copy from one multi-word pseudo-register to another,
381    record that in reg_copy_graph.  Return whether it is such a
382    copy.  */
383 
384 static bool
385 find_pseudo_copy (rtx set)
386 {
387   rtx dest = SET_DEST (set);
388   rtx src = SET_SRC (set);
389   unsigned int rd, rs;
390   bitmap b;
391 
392   if (!REG_P (dest) || !REG_P (src))
393     return false;
394 
395   rd = REGNO (dest);
396   rs = REGNO (src);
397   if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
398     return false;
399 
400   b = reg_copy_graph[rs];
401   if (b == NULL)
402     {
403       b = BITMAP_ALLOC (NULL);
404       reg_copy_graph[rs] = b;
405     }
406 
407   bitmap_set_bit (b, rd);
408 
409   return true;
410 }
411 
412 /* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
413    where they are copied to another register, add the register to
414    which they are copied to DECOMPOSABLE_CONTEXT.  Use
415    NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
416    copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */
417 
418 static void
419 propagate_pseudo_copies (void)
420 {
421   auto_bitmap queue, propagate;
422 
423   bitmap_copy (queue, decomposable_context);
424   do
425     {
426       bitmap_iterator iter;
427       unsigned int i;
428 
429       bitmap_clear (propagate);
430 
431       EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
432 	{
433 	  bitmap b = reg_copy_graph[i];
434 	  if (b)
435 	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
436 	}
437 
438       bitmap_and_compl (queue, propagate, decomposable_context);
439       bitmap_ior_into (decomposable_context, propagate);
440     }
441   while (!bitmap_empty_p (queue));
442 }
443 
444 /* A pointer to one of these values is passed to
445    find_decomposable_subregs.  */
446 
447 enum classify_move_insn
448 {
449   /* Not a simple move from one location to another.  */
450   NOT_SIMPLE_MOVE,
451   /* A simple move we want to decompose.  */
452   DECOMPOSABLE_SIMPLE_MOVE,
453   /* Any other simple move.  */
454   SIMPLE_MOVE
455 };
456 
457 /* If we find a SUBREG in *LOC which we could use to decompose a
458    pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
459    unadorned register which is not a simple pseudo-register copy,
460    DATA will point at the type of move, and we set a bit in
461    DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
462 
463 static void
464 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
465 {
466   subrtx_var_iterator::array_type array;
467   FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
468     {
469       rtx x = *iter;
470       if (GET_CODE (x) == SUBREG)
471 	{
472 	  rtx inner = SUBREG_REG (x);
473 	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
474 
475 	  if (!REG_P (inner))
476 	    continue;
477 
478 	  regno = REGNO (inner);
479 	  if (HARD_REGISTER_NUM_P (regno))
480 	    {
481 	      iter.skip_subrtxes ();
482 	      continue;
483 	    }
484 
485 	  if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
486 	      || !interesting_mode_p (GET_MODE (inner), &inner_size,
487 				      &inner_words))
488 	    continue;
489 
490 	  /* We only try to decompose single word subregs of multi-word
491 	     registers.  When we find one, we return -1 to avoid iterating
492 	     over the inner register.
493 
494 	     ??? This doesn't allow, e.g., DImode subregs of TImode values
495 	     on 32-bit targets.  We would need to record the way the
496 	     pseudo-register was used, and only decompose if all the uses
497 	     were the same number and size of pieces.  Hopefully this
498 	     doesn't happen much.  */
499 
500 	  if (outer_words == 1
501 	      && inner_words > 1
502 	      /* Don't allow to decompose floating point subregs of
503 		 multi-word pseudos if the floating point mode does
504 		 not have word size, because otherwise we'd generate
505 		 a subreg with that floating mode from a different
506 		 sized integral pseudo which is not allowed by
507 		 validate_subreg.  */
508 	      && (!FLOAT_MODE_P (GET_MODE (x))
509 		  || outer_size == UNITS_PER_WORD))
510 	    {
511 	      bitmap_set_bit (decomposable_context, regno);
512 	      iter.skip_subrtxes ();
513 	      continue;
514 	    }
515 
516 	  /* If this is a cast from one mode to another, where the modes
517 	     have the same size, and they are not tieable, then mark this
518 	     register as non-decomposable.  If we decompose it we are
519 	     likely to mess up whatever the backend is trying to do.  */
520 	  if (outer_words > 1
521 	      && outer_size == inner_size
522 	      && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
523 	    {
524 	      bitmap_set_bit (non_decomposable_context, regno);
525 	      bitmap_set_bit (subreg_context, regno);
526 	      iter.skip_subrtxes ();
527 	      continue;
528 	    }
529 	}
530       else if (REG_P (x))
531 	{
532 	  unsigned int regno, size, words;
533 
534 	  /* We will see an outer SUBREG before we see the inner REG, so
535 	     when we see a plain REG here it means a direct reference to
536 	     the register.
537 
538 	     If this is not a simple copy from one location to another,
539 	     then we can not decompose this register.  If this is a simple
540 	     copy we want to decompose, and the mode is right,
541 	     then we mark the register as decomposable.
542 	     Otherwise we don't say anything about this register --
543 	     it could be decomposed, but whether that would be
544 	     profitable depends upon how it is used elsewhere.
545 
546 	     We only set bits in the bitmap for multi-word
547 	     pseudo-registers, since those are the only ones we care about
548 	     and it keeps the size of the bitmaps down.  */
549 
550 	  regno = REGNO (x);
551 	  if (!HARD_REGISTER_NUM_P (regno)
552 	      && interesting_mode_p (GET_MODE (x), &size, &words)
553 	      && words > 1)
554 	    {
555 	      switch (*pcmi)
556 		{
557 		case NOT_SIMPLE_MOVE:
558 		  bitmap_set_bit (non_decomposable_context, regno);
559 		  break;
560 		case DECOMPOSABLE_SIMPLE_MOVE:
561 		  if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
562 		    bitmap_set_bit (decomposable_context, regno);
563 		  break;
564 		case SIMPLE_MOVE:
565 		  break;
566 		default:
567 		  gcc_unreachable ();
568 		}
569 	    }
570 	}
571       else if (MEM_P (x))
572 	{
573 	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
574 
575 	  /* Any registers used in a MEM do not participate in a
576 	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
577 	     here, and return -1 to block the parent's recursion.  */
578 	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
579 	  iter.skip_subrtxes ();
580 	}
581     }
582 }
583 
584 /* Decompose REGNO into word-sized components.  We smash the REG node
585    in place.  This ensures that (1) something goes wrong quickly if we
586    fail to make some replacement, and (2) the debug information inside
587    the symbol table is automatically kept up to date.  */
588 
589 static void
590 decompose_register (unsigned int regno)
591 {
592   rtx reg;
593   unsigned int size, words, i;
594   rtvec v;
595 
596   reg = regno_reg_rtx[regno];
597 
598   regno_reg_rtx[regno] = NULL_RTX;
599 
600   if (!interesting_mode_p (GET_MODE (reg), &size, &words))
601     gcc_unreachable ();
602 
603   v = rtvec_alloc (words);
604   for (i = 0; i < words; ++i)
605     RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
606 
607   PUT_CODE (reg, CONCATN);
608   XVEC (reg, 0) = v;
609 
610   if (dump_file)
611     {
612       fprintf (dump_file, "; Splitting reg %u ->", regno);
613       for (i = 0; i < words; ++i)
614 	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
615       fputc ('\n', dump_file);
616     }
617 }
618 
619 /* Get a SUBREG of a CONCATN.  */
620 
621 static rtx
622 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
623 {
624   unsigned int outer_size, outer_words, inner_size, inner_words;
625   machine_mode innermode, partmode;
626   rtx part;
627   unsigned int final_offset;
628   unsigned int byte;
629 
630   innermode = GET_MODE (op);
631   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
632       || !interesting_mode_p (innermode, &inner_size, &inner_words))
633     gcc_unreachable ();
634 
635   /* Must be constant if interesting_mode_p passes.  */
636   byte = orig_byte.to_constant ();
637   gcc_assert (GET_CODE (op) == CONCATN);
638   gcc_assert (byte % outer_size == 0);
639 
640   gcc_assert (byte < inner_size);
641   if (outer_size > inner_size)
642     return NULL_RTX;
643 
644   inner_size /= XVECLEN (op, 0);
645   part = XVECEXP (op, 0, byte / inner_size);
646   partmode = GET_MODE (part);
647 
648   final_offset = byte % inner_size;
649   if (final_offset + outer_size > inner_size)
650     return NULL_RTX;
651 
652   /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
653      regular CONST_VECTORs.  They have vector or integer modes, depending
654      on the capabilities of the target.  Cope with them.  */
655   if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
656     partmode = GET_MODE_INNER (innermode);
657   else if (partmode == VOIDmode)
658     partmode = mode_for_size (inner_size * BITS_PER_UNIT,
659 			      GET_MODE_CLASS (innermode), 0).require ();
660 
661   return simplify_gen_subreg (outermode, part, partmode, final_offset);
662 }
663 
664 /* Wrapper around simplify_gen_subreg which handles CONCATN.  */
665 
666 static rtx
667 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
668 			     machine_mode innermode, unsigned int byte)
669 {
670   rtx ret;
671 
672   /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
673      If OP is a SUBREG of a CONCATN, then it must be a simple mode
674      change with the same size and offset 0, or it must extract a
675      part.  We shouldn't see anything else here.  */
676   if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
677     {
678       rtx op2;
679 
680       if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
681 		    GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
682 	  && known_eq (SUBREG_BYTE (op), 0))
683 	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
684 					    GET_MODE (SUBREG_REG (op)), byte);
685 
686       op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
687 				     SUBREG_BYTE (op));
688       if (op2 == NULL_RTX)
689 	{
690 	  /* We don't handle paradoxical subregs here.  */
691 	  gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
692 	  gcc_assert (!paradoxical_subreg_p (op));
693 	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
694 					 byte + SUBREG_BYTE (op));
695 	  gcc_assert (op2 != NULL_RTX);
696 	  return op2;
697 	}
698 
699       op = op2;
700       gcc_assert (op != NULL_RTX);
701       gcc_assert (innermode == GET_MODE (op));
702     }
703 
704   if (GET_CODE (op) == CONCATN)
705     return simplify_subreg_concatn (outermode, op, byte);
706 
707   ret = simplify_gen_subreg (outermode, op, innermode, byte);
708 
709   /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
710      resolve_simple_move will ask for the high part of the paradoxical
711      subreg, which does not have a value.  Just return a zero.  */
712   if (ret == NULL_RTX
713       && paradoxical_subreg_p (op))
714     return CONST0_RTX (outermode);
715 
716   gcc_assert (ret != NULL_RTX);
717   return ret;
718 }
719 
720 /* Return whether we should resolve X into the registers into which it
721    was decomposed.  */
722 
723 static bool
724 resolve_reg_p (rtx x)
725 {
726   return GET_CODE (x) == CONCATN;
727 }
728 
729 /* Return whether X is a SUBREG of a register which we need to
730    resolve.  */
731 
732 static bool
733 resolve_subreg_p (rtx x)
734 {
735   if (GET_CODE (x) != SUBREG)
736     return false;
737   return resolve_reg_p (SUBREG_REG (x));
738 }
739 
740 /* Look for SUBREGs in *LOC which need to be decomposed.  */
741 
742 static bool
743 resolve_subreg_use (rtx *loc, rtx insn)
744 {
745   subrtx_ptr_iterator::array_type array;
746   FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
747     {
748       rtx *loc = *iter;
749       rtx x = *loc;
750       if (resolve_subreg_p (x))
751 	{
752 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
753 				       SUBREG_BYTE (x));
754 
755 	  /* It is possible for a note to contain a reference which we can
756 	     decompose.  In this case, return 1 to the caller to indicate
757 	     that the note must be removed.  */
758 	  if (!x)
759 	    {
760 	      gcc_assert (!insn);
761 	      return true;
762 	    }
763 
764 	  validate_change (insn, loc, x, 1);
765 	  iter.skip_subrtxes ();
766 	}
767       else if (resolve_reg_p (x))
768 	/* Return 1 to the caller to indicate that we found a direct
769 	   reference to a register which is being decomposed.  This can
770 	   happen inside notes, multiword shift or zero-extend
771 	   instructions.  */
772 	return true;
773     }
774 
775   return false;
776 }
777 
778 /* Resolve any decomposed registers which appear in register notes on
779    INSN.  */
780 
781 static void
782 resolve_reg_notes (rtx_insn *insn)
783 {
784   rtx *pnote, note;
785 
786   note = find_reg_equal_equiv_note (insn);
787   if (note)
788     {
789       int old_count = num_validated_changes ();
790       if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
791 	remove_note (insn, note);
792       else
793 	if (old_count != num_validated_changes ())
794 	  df_notes_rescan (insn);
795     }
796 
797   pnote = &REG_NOTES (insn);
798   while (*pnote != NULL_RTX)
799     {
800       bool del = false;
801 
802       note = *pnote;
803       switch (REG_NOTE_KIND (note))
804 	{
805 	case REG_DEAD:
806 	case REG_UNUSED:
807 	  if (resolve_reg_p (XEXP (note, 0)))
808 	    del = true;
809 	  break;
810 
811 	default:
812 	  break;
813 	}
814 
815       if (del)
816 	*pnote = XEXP (note, 1);
817       else
818 	pnote = &XEXP (note, 1);
819     }
820 }
821 
822 /* Return whether X can be decomposed into subwords.  */
823 
824 static bool
825 can_decompose_p (rtx x)
826 {
827   if (REG_P (x))
828     {
829       unsigned int regno = REGNO (x);
830 
831       if (HARD_REGISTER_NUM_P (regno))
832 	{
833 	  unsigned int byte, num_bytes, num_words;
834 
835 	  if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
836 	    return false;
837 	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
838 	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
839 	      return false;
840 	  return true;
841 	}
842       else
843 	return !bitmap_bit_p (subreg_context, regno);
844     }
845 
846   return true;
847 }
848 
849 /* Decompose the registers used in a simple move SET within INSN.  If
850    we don't change anything, return INSN, otherwise return the start
851    of the sequence of moves.  */
852 
853 static rtx_insn *
854 resolve_simple_move (rtx set, rtx_insn *insn)
855 {
856   rtx src, dest, real_dest;
857   rtx_insn *insns;
858   machine_mode orig_mode, dest_mode;
859   unsigned int orig_size, words;
860   bool pushing;
861 
862   src = SET_SRC (set);
863   dest = SET_DEST (set);
864   orig_mode = GET_MODE (dest);
865 
866   if (!interesting_mode_p (orig_mode, &orig_size, &words))
867     gcc_unreachable ();
868   gcc_assert (words > 1);
869 
870   start_sequence ();
871 
872   /* We have to handle copying from a SUBREG of a decomposed reg where
873      the SUBREG is larger than word size.  Rather than assume that we
874      can take a word_mode SUBREG of the destination, we copy to a new
875      register and then copy that to the destination.  */
876 
877   real_dest = NULL_RTX;
878 
879   if (GET_CODE (src) == SUBREG
880       && resolve_reg_p (SUBREG_REG (src))
881       && (maybe_ne (SUBREG_BYTE (src), 0)
882 	  || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
883     {
884       real_dest = dest;
885       dest = gen_reg_rtx (orig_mode);
886       if (REG_P (real_dest))
887 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
888     }
889 
890   /* Similarly if we are copying to a SUBREG of a decomposed reg where
891      the SUBREG is larger than word size.  */
892 
893   if (GET_CODE (dest) == SUBREG
894       && resolve_reg_p (SUBREG_REG (dest))
895       && (maybe_ne (SUBREG_BYTE (dest), 0)
896 	  || maybe_ne (orig_size,
897 		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
898     {
899       rtx reg, smove;
900       rtx_insn *minsn;
901 
902       reg = gen_reg_rtx (orig_mode);
903       minsn = emit_move_insn (reg, src);
904       smove = single_set (minsn);
905       gcc_assert (smove != NULL_RTX);
906       resolve_simple_move (smove, minsn);
907       src = reg;
908     }
909 
910   /* If we didn't have any big SUBREGS of decomposed registers, and
911      neither side of the move is a register we are decomposing, then
912      we don't have to do anything here.  */
913 
914   if (src == SET_SRC (set)
915       && dest == SET_DEST (set)
916       && !resolve_reg_p (src)
917       && !resolve_subreg_p (src)
918       && !resolve_reg_p (dest)
919       && !resolve_subreg_p (dest))
920     {
921       end_sequence ();
922       return insn;
923     }
924 
925   /* It's possible for the code to use a subreg of a decomposed
926      register while forming an address.  We need to handle that before
927      passing the address to emit_move_insn.  We pass NULL_RTX as the
928      insn parameter to resolve_subreg_use because we can not validate
929      the insn yet.  */
930   if (MEM_P (src) || MEM_P (dest))
931     {
932       int acg;
933 
934       if (MEM_P (src))
935 	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
936       if (MEM_P (dest))
937 	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
938       acg = apply_change_group ();
939       gcc_assert (acg);
940     }
941 
942   /* If SRC is a register which we can't decompose, or has side
943      effects, we need to move via a temporary register.  */
944 
945   if (!can_decompose_p (src)
946       || side_effects_p (src)
947       || GET_CODE (src) == ASM_OPERANDS)
948     {
949       rtx reg;
950 
951       reg = gen_reg_rtx (orig_mode);
952 
953       if (AUTO_INC_DEC)
954 	{
955 	  rtx_insn *move = emit_move_insn (reg, src);
956 	  if (MEM_P (src))
957 	    {
958 	      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
959 	      if (note)
960 		add_reg_note (move, REG_INC, XEXP (note, 0));
961 	    }
962 	}
963       else
964 	emit_move_insn (reg, src);
965 
966       src = reg;
967     }
968 
969   /* If DEST is a register which we can't decompose, or has side
970      effects, we need to first move to a temporary register.  We
971      handle the common case of pushing an operand directly.  We also
972      go through a temporary register if it holds a floating point
973      value.  This gives us better code on systems which can't move
974      data easily between integer and floating point registers.  */
975 
976   dest_mode = orig_mode;
977   pushing = push_operand (dest, dest_mode);
978   if (!can_decompose_p (dest)
979       || (side_effects_p (dest) && !pushing)
980       || (!SCALAR_INT_MODE_P (dest_mode)
981 	  && !resolve_reg_p (dest)
982 	  && !resolve_subreg_p (dest)))
983     {
984       if (real_dest == NULL_RTX)
985 	real_dest = dest;
986       if (!SCALAR_INT_MODE_P (dest_mode))
987 	dest_mode = int_mode_for_mode (dest_mode).require ();
988       dest = gen_reg_rtx (dest_mode);
989       if (REG_P (real_dest))
990 	REG_ATTRS (dest) = REG_ATTRS (real_dest);
991     }
992 
993   if (pushing)
994     {
995       unsigned int i, j, jinc;
996 
997       gcc_assert (orig_size % UNITS_PER_WORD == 0);
998       gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
999       gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1000 
1001       if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1002 	{
1003 	  j = 0;
1004 	  jinc = 1;
1005 	}
1006       else
1007 	{
1008 	  j = words - 1;
1009 	  jinc = -1;
1010 	}
1011 
1012       for (i = 0; i < words; ++i, j += jinc)
1013 	{
1014 	  rtx temp;
1015 
1016 	  temp = copy_rtx (XEXP (dest, 0));
1017 	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
1018 					       j * UNITS_PER_WORD);
1019 	  emit_move_insn (temp,
1020 			  simplify_gen_subreg_concatn (word_mode, src,
1021 						       orig_mode,
1022 						       j * UNITS_PER_WORD));
1023 	}
1024     }
1025   else
1026     {
1027       unsigned int i;
1028 
1029       if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1030 	emit_clobber (dest);
1031 
1032       for (i = 0; i < words; ++i)
1033 	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1034 						     dest_mode,
1035 						     i * UNITS_PER_WORD),
1036 			simplify_gen_subreg_concatn (word_mode, src,
1037 						     orig_mode,
1038 						     i * UNITS_PER_WORD));
1039     }
1040 
1041   if (real_dest != NULL_RTX)
1042     {
1043       rtx mdest, smove;
1044       rtx_insn *minsn;
1045 
1046       if (dest_mode == orig_mode)
1047 	mdest = dest;
1048       else
1049 	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1050       minsn = emit_move_insn (real_dest, mdest);
1051 
1052   if (AUTO_INC_DEC && MEM_P (real_dest)
1053       && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1054     {
1055       rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1056       if (note)
1057 	add_reg_note (minsn, REG_INC, XEXP (note, 0));
1058     }
1059 
1060       smove = single_set (minsn);
1061       gcc_assert (smove != NULL_RTX);
1062 
1063       resolve_simple_move (smove, minsn);
1064     }
1065 
1066   insns = get_insns ();
1067   end_sequence ();
1068 
1069   copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1070 
1071   emit_insn_before (insns, insn);
1072 
1073   /* If we get here via self-recursion, then INSN is not yet in the insns
1074      chain and delete_insn will fail.  We only want to remove INSN from the
1075      current sequence.  See PR56738.  */
1076   if (in_sequence_p ())
1077     remove_insn (insn);
1078   else
1079     delete_insn (insn);
1080 
1081   return insns;
1082 }
1083 
1084 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1085    component registers.  Return whether we changed something.  */
1086 
1087 static bool
1088 resolve_clobber (rtx pat, rtx_insn *insn)
1089 {
1090   rtx reg;
1091   machine_mode orig_mode;
1092   unsigned int orig_size, words, i;
1093   int ret;
1094 
1095   reg = XEXP (pat, 0);
1096   if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1097     return false;
1098 
1099   orig_mode = GET_MODE (reg);
1100   if (!interesting_mode_p (orig_mode, &orig_size, &words))
1101     gcc_unreachable ();
1102 
1103   ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1104 			 simplify_gen_subreg_concatn (word_mode, reg,
1105 						      orig_mode, 0),
1106 			 0);
1107   df_insn_rescan (insn);
1108   gcc_assert (ret != 0);
1109 
1110   for (i = words - 1; i > 0; --i)
1111     {
1112       rtx x;
1113 
1114       x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1115 				       i * UNITS_PER_WORD);
1116       x = gen_rtx_CLOBBER (VOIDmode, x);
1117       emit_insn_after (x, insn);
1118     }
1119 
1120   resolve_reg_notes (insn);
1121 
1122   return true;
1123 }
1124 
1125 /* A USE of a decomposed register is no longer meaningful.  Return
1126    whether we changed something.  */
1127 
1128 static bool
1129 resolve_use (rtx pat, rtx_insn *insn)
1130 {
1131   if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1132     {
1133       delete_insn (insn);
1134       return true;
1135     }
1136 
1137   resolve_reg_notes (insn);
1138 
1139   return false;
1140 }
1141 
1142 /* A VAR_LOCATION can be simplified.  */
1143 
1144 static void
1145 resolve_debug (rtx_insn *insn)
1146 {
1147   subrtx_ptr_iterator::array_type array;
1148   FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1149     {
1150       rtx *loc = *iter;
1151       rtx x = *loc;
1152       if (resolve_subreg_p (x))
1153 	{
1154 	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1155 				       SUBREG_BYTE (x));
1156 
1157 	  if (x)
1158 	    *loc = x;
1159 	  else
1160 	    x = copy_rtx (*loc);
1161 	}
1162       if (resolve_reg_p (x))
1163 	*loc = copy_rtx (x);
1164     }
1165 
1166   df_insn_rescan (insn);
1167 
1168   resolve_reg_notes (insn);
1169 }
1170 
1171 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1172    set the decomposable_context bitmap accordingly.  SPEED_P is true
1173    if we are optimizing INSN for speed rather than size.  Return true
1174    if INSN is decomposable.  */
1175 
1176 static bool
1177 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1178 {
1179   rtx set;
1180   rtx op;
1181   rtx op_operand;
1182 
1183   set = single_set (insn);
1184   if (!set)
1185     return false;
1186 
1187   op = SET_SRC (set);
1188   if (GET_CODE (op) != ASHIFT
1189       && GET_CODE (op) != LSHIFTRT
1190       && GET_CODE (op) != ASHIFTRT
1191       && GET_CODE (op) != ZERO_EXTEND)
1192     return false;
1193 
1194   op_operand = XEXP (op, 0);
1195   if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1196       || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1197       || HARD_REGISTER_NUM_P (REGNO (op_operand))
1198       || GET_MODE (op) != twice_word_mode)
1199     return false;
1200 
1201   if (GET_CODE (op) == ZERO_EXTEND)
1202     {
1203       if (GET_MODE (op_operand) != word_mode
1204 	  || !choices[speed_p].splitting_zext)
1205 	return false;
1206     }
1207   else /* left or right shift */
1208     {
1209       bool *splitting = (GET_CODE (op) == ASHIFT
1210 			 ? choices[speed_p].splitting_ashift
1211 			 : GET_CODE (op) == ASHIFTRT
1212 			 ? choices[speed_p].splitting_ashiftrt
1213 			 : choices[speed_p].splitting_lshiftrt);
1214       if (!CONST_INT_P (XEXP (op, 1))
1215 	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1216 			2 * BITS_PER_WORD - 1)
1217 	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1218 	return false;
1219 
1220       bitmap_set_bit (decomposable_context, REGNO (op_operand));
1221     }
1222 
1223   bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1224 
1225   return true;
1226 }
1227 
1228 /* Decompose a more than word wide shift (in INSN) of a multiword
1229    pseudo or a multiword zero-extend of a wordmode pseudo into a move
1230    and 'set to zero' insn.  Return a pointer to the new insn when a
1231    replacement was done.  */
1232 
1233 static rtx_insn *
1234 resolve_shift_zext (rtx_insn *insn)
1235 {
1236   rtx set;
1237   rtx op;
1238   rtx op_operand;
1239   rtx_insn *insns;
1240   rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1241   int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1242   scalar_int_mode inner_mode;
1243 
1244   set = single_set (insn);
1245   if (!set)
1246     return NULL;
1247 
1248   op = SET_SRC (set);
1249   if (GET_CODE (op) != ASHIFT
1250       && GET_CODE (op) != LSHIFTRT
1251       && GET_CODE (op) != ASHIFTRT
1252       && GET_CODE (op) != ZERO_EXTEND)
1253     return NULL;
1254 
1255   op_operand = XEXP (op, 0);
1256   if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1257     return NULL;
1258 
1259   /* We can tear this operation apart only if the regs were already
1260      torn apart.  */
1261   if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1262     return NULL;
1263 
1264   /* src_reg_num is the number of the word mode register which we
1265      are operating on.  For a left shift and a zero_extend on little
1266      endian machines this is register 0.  */
1267   src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1268 		? 1 : 0;
1269 
1270   if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1271     src_reg_num = 1 - src_reg_num;
1272 
1273   if (GET_CODE (op) == ZERO_EXTEND)
1274     dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1275   else
1276     dest_reg_num = 1 - src_reg_num;
1277 
1278   offset1 = UNITS_PER_WORD * dest_reg_num;
1279   offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1280   src_offset = UNITS_PER_WORD * src_reg_num;
1281 
1282   start_sequence ();
1283 
1284   dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1285                                           GET_MODE (SET_DEST (set)),
1286                                           offset1);
1287   dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1288 					    GET_MODE (SET_DEST (set)),
1289 					    offset2);
1290   src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1291                                          GET_MODE (op_operand),
1292                                          src_offset);
1293   if (GET_CODE (op) == ASHIFTRT
1294       && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1295     upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1296 			      BITS_PER_WORD - 1, NULL_RTX, 0);
1297 
1298   if (GET_CODE (op) != ZERO_EXTEND)
1299     {
1300       int shift_count = INTVAL (XEXP (op, 1));
1301       if (shift_count > BITS_PER_WORD)
1302 	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1303 				LSHIFT_EXPR : RSHIFT_EXPR,
1304 				word_mode, src_reg,
1305 				shift_count - BITS_PER_WORD,
1306 				dest_reg, GET_CODE (op) != ASHIFTRT);
1307     }
1308 
1309   if (dest_reg != src_reg)
1310     emit_move_insn (dest_reg, src_reg);
1311   if (GET_CODE (op) != ASHIFTRT)
1312     emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1313   else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1314     emit_move_insn (dest_upper, copy_rtx (src_reg));
1315   else
1316     emit_move_insn (dest_upper, upper_src);
1317   insns = get_insns ();
1318 
1319   end_sequence ();
1320 
1321   emit_insn_before (insns, insn);
1322 
1323   if (dump_file)
1324     {
1325       rtx_insn *in;
1326       fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1327       for (in = insns; in != insn; in = NEXT_INSN (in))
1328 	fprintf (dump_file, "%d ", INSN_UID (in));
1329       fprintf (dump_file, "\n");
1330     }
1331 
1332   delete_insn (insn);
1333   return insns;
1334 }
1335 
1336 /* Print to dump_file a description of what we're doing with shift code CODE.
1337    SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */
1338 
1339 static void
1340 dump_shift_choices (enum rtx_code code, bool *splitting)
1341 {
1342   int i;
1343   const char *sep;
1344 
1345   fprintf (dump_file,
1346 	   "  Splitting mode %s for %s lowering with shift amounts = ",
1347 	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1348   sep = "";
1349   for (i = 0; i < BITS_PER_WORD; i++)
1350     if (splitting[i])
1351       {
1352 	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1353 	sep = ",";
1354       }
1355   fprintf (dump_file, "\n");
1356 }
1357 
1358 /* Print to dump_file a description of what we're doing when optimizing
1359    for speed or size; SPEED_P says which.  DESCRIPTION is a description
1360    of the SPEED_P choice.  */
1361 
1362 static void
1363 dump_choices (bool speed_p, const char *description)
1364 {
1365   unsigned int size, factor, i;
1366 
1367   fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1368 
1369   for (i = 0; i < MAX_MACHINE_MODE; i++)
1370     if (interesting_mode_p ((machine_mode) i, &size, &factor)
1371 	&& factor > 1)
1372       fprintf (dump_file, "  %s mode %s for copy lowering.\n",
1373 	       choices[speed_p].move_modes_to_split[i]
1374 	       ? "Splitting"
1375 	       : "Skipping",
1376 	       GET_MODE_NAME ((machine_mode) i));
1377 
1378   fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
1379 	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1380 	   GET_MODE_NAME (twice_word_mode));
1381 
1382   dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1383   dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1384   dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1385   fprintf (dump_file, "\n");
1386 }
1387 
1388 /* Look for registers which are always accessed via word-sized SUBREGs
1389    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
1390    registers into several word-sized pseudo-registers.  */
1391 
1392 static void
1393 decompose_multiword_subregs (bool decompose_copies)
1394 {
1395   unsigned int max;
1396   basic_block bb;
1397   bool speed_p;
1398 
1399   if (dump_file)
1400     {
1401       dump_choices (false, "size");
1402       dump_choices (true, "speed");
1403     }
1404 
1405   /* Check if this target even has any modes to consider lowering.   */
1406   if (!choices[false].something_to_do && !choices[true].something_to_do)
1407     {
1408       if (dump_file)
1409 	fprintf (dump_file, "Nothing to do!\n");
1410       return;
1411     }
1412 
1413   max = max_reg_num ();
1414 
1415   /* First see if there are any multi-word pseudo-registers.  If there
1416      aren't, there is nothing we can do.  This should speed up this
1417      pass in the normal case, since it should be faster than scanning
1418      all the insns.  */
1419   {
1420     unsigned int i;
1421     bool useful_modes_seen = false;
1422 
1423     for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1424       if (regno_reg_rtx[i] != NULL)
1425 	{
1426 	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1427 	  if (choices[false].move_modes_to_split[(int) mode]
1428 	      || choices[true].move_modes_to_split[(int) mode])
1429 	    {
1430 	      useful_modes_seen = true;
1431 	      break;
1432 	    }
1433 	}
1434 
1435     if (!useful_modes_seen)
1436       {
1437 	if (dump_file)
1438 	  fprintf (dump_file, "Nothing to lower in this function.\n");
1439 	return;
1440       }
1441   }
1442 
1443   if (df)
1444     {
1445       df_set_flags (DF_DEFER_INSN_RESCAN);
1446       run_word_dce ();
1447     }
1448 
1449   /* FIXME: It may be possible to change this code to look for each
1450      multi-word pseudo-register and to find each insn which sets or
1451      uses that register.  That should be faster than scanning all the
1452      insns.  */
1453 
1454   decomposable_context = BITMAP_ALLOC (NULL);
1455   non_decomposable_context = BITMAP_ALLOC (NULL);
1456   subreg_context = BITMAP_ALLOC (NULL);
1457 
1458   reg_copy_graph.create (max);
1459   reg_copy_graph.safe_grow_cleared (max);
1460   memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1461 
1462   speed_p = optimize_function_for_speed_p (cfun);
1463   FOR_EACH_BB_FN (bb, cfun)
1464     {
1465       rtx_insn *insn;
1466 
1467       FOR_BB_INSNS (bb, insn)
1468 	{
1469 	  rtx set;
1470 	  enum classify_move_insn cmi;
1471 	  int i, n;
1472 
1473 	  if (!INSN_P (insn)
1474 	      || GET_CODE (PATTERN (insn)) == CLOBBER
1475 	      || GET_CODE (PATTERN (insn)) == USE)
1476 	    continue;
1477 
1478 	  recog_memoized (insn);
1479 
1480 	  if (find_decomposable_shift_zext (insn, speed_p))
1481 	    continue;
1482 
1483 	  extract_insn (insn);
1484 
1485 	  set = simple_move (insn, speed_p);
1486 
1487 	  if (!set)
1488 	    cmi = NOT_SIMPLE_MOVE;
1489 	  else
1490 	    {
1491 	      /* We mark pseudo-to-pseudo copies as decomposable during the
1492 		 second pass only.  The first pass is so early that there is
1493 		 good chance such moves will be optimized away completely by
1494 		 subsequent optimizations anyway.
1495 
1496 		 However, we call find_pseudo_copy even during the first pass
1497 		 so as to properly set up the reg_copy_graph.  */
1498 	      if (find_pseudo_copy (set))
1499 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1500 	      else
1501 		cmi = SIMPLE_MOVE;
1502 	    }
1503 
1504 	  n = recog_data.n_operands;
1505 	  for (i = 0; i < n; ++i)
1506 	    {
1507 	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1508 
1509 	      /* We handle ASM_OPERANDS as a special case to support
1510 		 things like x86 rdtsc which returns a DImode value.
1511 		 We can decompose the output, which will certainly be
1512 		 operand 0, but not the inputs.  */
1513 
1514 	      if (cmi == SIMPLE_MOVE
1515 		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1516 		{
1517 		  gcc_assert (i == 0);
1518 		  cmi = NOT_SIMPLE_MOVE;
1519 		}
1520 	    }
1521 	}
1522     }
1523 
1524   bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1525   if (!bitmap_empty_p (decomposable_context))
1526     {
1527       unsigned int i;
1528       sbitmap_iterator sbi;
1529       bitmap_iterator iter;
1530       unsigned int regno;
1531 
1532       propagate_pseudo_copies ();
1533 
1534       auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1535       bitmap_clear (sub_blocks);
1536 
1537       EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1538 	decompose_register (regno);
1539 
1540       FOR_EACH_BB_FN (bb, cfun)
1541 	{
1542 	  rtx_insn *insn;
1543 
1544 	  FOR_BB_INSNS (bb, insn)
1545 	    {
1546 	      rtx pat;
1547 
1548 	      if (!INSN_P (insn))
1549 		continue;
1550 
1551 	      pat = PATTERN (insn);
1552 	      if (GET_CODE (pat) == CLOBBER)
1553 		resolve_clobber (pat, insn);
1554 	      else if (GET_CODE (pat) == USE)
1555 		resolve_use (pat, insn);
1556 	      else if (DEBUG_INSN_P (insn))
1557 		resolve_debug (insn);
1558 	      else
1559 		{
1560 		  rtx set;
1561 		  int i;
1562 
1563 		  recog_memoized (insn);
1564 		  extract_insn (insn);
1565 
1566 		  set = simple_move (insn, speed_p);
1567 		  if (set)
1568 		    {
1569 		      rtx_insn *orig_insn = insn;
1570 		      bool cfi = control_flow_insn_p (insn);
1571 
1572 		      /* We can end up splitting loads to multi-word pseudos
1573 			 into separate loads to machine word size pseudos.
1574 			 When this happens, we first had one load that can
1575 			 throw, and after resolve_simple_move we'll have a
1576 			 bunch of loads (at least two).  All those loads may
1577 			 trap if we can have non-call exceptions, so they
1578 			 all will end the current basic block.  We split the
1579 			 block after the outer loop over all insns, but we
1580 			 make sure here that we will be able to split the
1581 			 basic block and still produce the correct control
1582 			 flow graph for it.  */
1583 		      gcc_assert (!cfi
1584 				  || (cfun->can_throw_non_call_exceptions
1585 				      && can_throw_internal (insn)));
1586 
1587 		      insn = resolve_simple_move (set, insn);
1588 		      if (insn != orig_insn)
1589 			{
1590 			  recog_memoized (insn);
1591 			  extract_insn (insn);
1592 
1593 			  if (cfi)
1594 			    bitmap_set_bit (sub_blocks, bb->index);
1595 			}
1596 		    }
1597 		  else
1598 		    {
1599 		      rtx_insn *decomposed_shift;
1600 
1601 		      decomposed_shift = resolve_shift_zext (insn);
1602 		      if (decomposed_shift != NULL_RTX)
1603 			{
1604 			  insn = decomposed_shift;
1605 			  recog_memoized (insn);
1606 			  extract_insn (insn);
1607 			}
1608 		    }
1609 
1610 		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1611 		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1612 
1613 		  resolve_reg_notes (insn);
1614 
1615 		  if (num_validated_changes () > 0)
1616 		    {
1617 		      for (i = recog_data.n_dups - 1; i >= 0; --i)
1618 			{
1619 			  rtx *pl = recog_data.dup_loc[i];
1620 			  int dup_num = recog_data.dup_num[i];
1621 			  rtx *px = recog_data.operand_loc[dup_num];
1622 
1623 			  validate_unshare_change (insn, pl, *px, 1);
1624 			}
1625 
1626 		      i = apply_change_group ();
1627 		      gcc_assert (i);
1628 		    }
1629 		}
1630 	    }
1631 	}
1632 
1633       /* If we had insns to split that caused control flow insns in the middle
1634 	 of a basic block, split those blocks now.  Note that we only handle
1635 	 the case where splitting a load has caused multiple possibly trapping
1636 	 loads to appear.  */
1637       EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1638 	{
1639 	  rtx_insn *insn, *end;
1640 	  edge fallthru;
1641 
1642 	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1643 	  insn = BB_HEAD (bb);
1644 	  end = BB_END (bb);
1645 
1646 	  while (insn != end)
1647 	    {
1648 	      if (control_flow_insn_p (insn))
1649 		{
1650 		  /* Split the block after insn.  There will be a fallthru
1651 		     edge, which is OK so we keep it.  We have to create the
1652 		     exception edges ourselves.  */
1653 		  fallthru = split_block (bb, insn);
1654 		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
1655 		  bb = fallthru->dest;
1656 		  insn = BB_HEAD (bb);
1657 		}
1658 	      else
1659 	        insn = NEXT_INSN (insn);
1660 	    }
1661 	}
1662     }
1663 
1664   {
1665     unsigned int i;
1666     bitmap b;
1667 
1668     FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1669       if (b)
1670 	BITMAP_FREE (b);
1671   }
1672 
1673   reg_copy_graph.release ();
1674 
1675   BITMAP_FREE (decomposable_context);
1676   BITMAP_FREE (non_decomposable_context);
1677   BITMAP_FREE (subreg_context);
1678 }
1679 
1680 /* Implement first lower subreg pass.  */
1681 
1682 namespace {
1683 
1684 const pass_data pass_data_lower_subreg =
1685 {
1686   RTL_PASS, /* type */
1687   "subreg1", /* name */
1688   OPTGROUP_NONE, /* optinfo_flags */
1689   TV_LOWER_SUBREG, /* tv_id */
1690   0, /* properties_required */
1691   0, /* properties_provided */
1692   0, /* properties_destroyed */
1693   0, /* todo_flags_start */
1694   0, /* todo_flags_finish */
1695 };
1696 
1697 class pass_lower_subreg : public rtl_opt_pass
1698 {
1699 public:
1700   pass_lower_subreg (gcc::context *ctxt)
1701     : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1702   {}
1703 
1704   /* opt_pass methods: */
1705   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1706   virtual unsigned int execute (function *)
1707     {
1708       decompose_multiword_subregs (false);
1709       return 0;
1710     }
1711 
1712 }; // class pass_lower_subreg
1713 
1714 } // anon namespace
1715 
1716 rtl_opt_pass *
1717 make_pass_lower_subreg (gcc::context *ctxt)
1718 {
1719   return new pass_lower_subreg (ctxt);
1720 }
1721 
1722 /* Implement second lower subreg pass.  */
1723 
1724 namespace {
1725 
1726 const pass_data pass_data_lower_subreg2 =
1727 {
1728   RTL_PASS, /* type */
1729   "subreg2", /* name */
1730   OPTGROUP_NONE, /* optinfo_flags */
1731   TV_LOWER_SUBREG, /* tv_id */
1732   0, /* properties_required */
1733   0, /* properties_provided */
1734   0, /* properties_destroyed */
1735   0, /* todo_flags_start */
1736   TODO_df_finish, /* todo_flags_finish */
1737 };
1738 
1739 class pass_lower_subreg2 : public rtl_opt_pass
1740 {
1741 public:
1742   pass_lower_subreg2 (gcc::context *ctxt)
1743     : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1744   {}
1745 
1746   /* opt_pass methods: */
1747   virtual bool gate (function *) { return flag_split_wide_types != 0; }
1748   virtual unsigned int execute (function *)
1749     {
1750       decompose_multiword_subregs (true);
1751       return 0;
1752     }
1753 
1754 }; // class pass_lower_subreg2
1755 
1756 } // anon namespace
1757 
1758 rtl_opt_pass *
1759 make_pass_lower_subreg2 (gcc::context *ctxt)
1760 {
1761   return new pass_lower_subreg2 (ctxt);
1762 }
1763